1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
   2
   3from __future__ import annotations
   4
   5from copy import copy
   6from typing import Any, List, Type, Union, Iterable, Optional, cast
   7from functools import partial
   8from typing_extensions import Literal, overload
   9
  10import httpx
  11
  12from ... import _legacy_response
  13from ..._types import NOT_GIVEN, Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
  14from ..._utils import is_given, maybe_transform, async_maybe_transform
  15from ..._compat import cached_property
  16from ..._resource import SyncAPIResource, AsyncAPIResource
  17from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
  18from .input_items import (
  19    InputItems,
  20    AsyncInputItems,
  21    InputItemsWithRawResponse,
  22    AsyncInputItemsWithRawResponse,
  23    InputItemsWithStreamingResponse,
  24    AsyncInputItemsWithStreamingResponse,
  25)
  26from ..._streaming import Stream, AsyncStream
  27from ...lib._tools import PydanticFunctionTool, ResponsesPydanticFunctionTool
  28from .input_tokens import (
  29    InputTokens,
  30    AsyncInputTokens,
  31    InputTokensWithRawResponse,
  32    AsyncInputTokensWithRawResponse,
  33    InputTokensWithStreamingResponse,
  34    AsyncInputTokensWithStreamingResponse,
  35)
  36from ..._base_client import make_request_options
  37from ...types.responses import (
  38    response_create_params,
  39    response_compact_params,
  40    response_retrieve_params,
  41)
  42from ...lib._parsing._responses import (
  43    TextFormatT,
  44    parse_response,
  45    type_to_text_format_param as _type_to_text_format_param,
  46)
  47from ...types.responses.response import Response
  48from ...types.responses.tool_param import ToolParam, ParseableToolParam
  49from ...types.shared_params.metadata import Metadata
  50from ...types.shared_params.reasoning import Reasoning
  51from ...types.responses.parsed_response import ParsedResponse
  52from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
  53from ...types.responses.compacted_response import CompactedResponse
  54from ...types.responses.response_includable import ResponseIncludable
  55from ...types.shared_params.responses_model import ResponsesModel
  56from ...types.responses.response_input_param import ResponseInputParam
  57from ...types.responses.response_prompt_param import ResponsePromptParam
  58from ...types.responses.response_stream_event import ResponseStreamEvent
  59from ...types.responses.response_input_item_param import ResponseInputItemParam
  60from ...types.responses.response_text_config_param import ResponseTextConfigParam
  61
  62__all__ = ["Responses", "AsyncResponses"]
  63
  64
  65class Responses(SyncAPIResource):
  66    @cached_property
  67    def input_items(self) -> InputItems:
  68        return InputItems(self._client)
  69
  70    @cached_property
  71    def input_tokens(self) -> InputTokens:
  72        return InputTokens(self._client)
  73
  74    @cached_property
  75    def with_raw_response(self) -> ResponsesWithRawResponse:
  76        """
  77        This property can be used as a prefix for any HTTP method call to return
  78        the raw response object instead of the parsed content.
  79
  80        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  81        """
  82        return ResponsesWithRawResponse(self)
  83
  84    @cached_property
  85    def with_streaming_response(self) -> ResponsesWithStreamingResponse:
  86        """
  87        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  88
  89        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  90        """
  91        return ResponsesWithStreamingResponse(self)
  92
  93    @overload
  94    def create(
  95        self,
  96        *,
  97        background: Optional[bool] | Omit = omit,
  98        conversation: Optional[response_create_params.Conversation] | Omit = omit,
  99        include: Optional[List[ResponseIncludable]] | Omit = omit,
 100        input: Union[str, ResponseInputParam] | Omit = omit,
 101        instructions: Optional[str] | Omit = omit,
 102        max_output_tokens: Optional[int] | Omit = omit,
 103        max_tool_calls: Optional[int] | Omit = omit,
 104        metadata: Optional[Metadata] | Omit = omit,
 105        model: ResponsesModel | Omit = omit,
 106        parallel_tool_calls: Optional[bool] | Omit = omit,
 107        previous_response_id: Optional[str] | Omit = omit,
 108        prompt: Optional[ResponsePromptParam] | Omit = omit,
 109        prompt_cache_key: str | Omit = omit,
 110        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
 111        reasoning: Optional[Reasoning] | Omit = omit,
 112        safety_identifier: str | Omit = omit,
 113        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
 114        store: Optional[bool] | Omit = omit,
 115        stream: Optional[Literal[False]] | Omit = omit,
 116        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
 117        temperature: Optional[float] | Omit = omit,
 118        text: ResponseTextConfigParam | Omit = omit,
 119        tool_choice: response_create_params.ToolChoice | Omit = omit,
 120        tools: Iterable[ToolParam] | Omit = omit,
 121        top_logprobs: Optional[int] | Omit = omit,
 122        top_p: Optional[float] | Omit = omit,
 123        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
 124        user: str | Omit = omit,
 125        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 126        # The extra values given here take precedence over values defined on the client or passed to this method.
 127        extra_headers: Headers | None = None,
 128        extra_query: Query | None = None,
 129        extra_body: Body | None = None,
 130        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 131    ) -> Response:
 132        """Creates a model response.
 133
 134        Provide
 135        [text](https://platform.openai.com/docs/guides/text) or
 136        [image](https://platform.openai.com/docs/guides/images) inputs to generate
 137        [text](https://platform.openai.com/docs/guides/text) or
 138        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
 139        the model call your own
 140        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
 141        built-in [tools](https://platform.openai.com/docs/guides/tools) like
 142        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
 143        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
 144        your own data as input for the model's response.
 145
 146        Args:
 147          background: Whether to run the model response in the background.
 148              [Learn more](https://platform.openai.com/docs/guides/background).
 149
 150          conversation: The conversation that this response belongs to. Items from this conversation are
 151              prepended to `input_items` for this response request. Input items and output
 152              items from this response are automatically added to this conversation after this
 153              response completes.
 154
 155          include: Specify additional output data to include in the model response. Currently
 156              supported values are:
 157
 158              - `web_search_call.action.sources`: Include the sources of the web search tool
 159                call.
 160              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
 161                in code interpreter tool call items.
 162              - `computer_call_output.output.image_url`: Include image urls from the computer
 163                call output.
 164              - `file_search_call.results`: Include the search results of the file search tool
 165                call.
 166              - `message.input_image.image_url`: Include image urls from the input message.
 167              - `message.output_text.logprobs`: Include logprobs with assistant messages.
 168              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
 169                tokens in reasoning item outputs. This enables reasoning items to be used in
 170                multi-turn conversations when using the Responses API statelessly (like when
 171                the `store` parameter is set to `false`, or when an organization is enrolled
 172                in the zero data retention program).
 173
 174          input: Text, image, or file inputs to the model, used to generate a response.
 175
 176              Learn more:
 177
 178              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
 179              - [Image inputs](https://platform.openai.com/docs/guides/images)
 180              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
 181              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
 182              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 183
 184          instructions: A system (or developer) message inserted into the model's context.
 185
 186              When using along with `previous_response_id`, the instructions from a previous
 187              response will not be carried over to the next response. This makes it simple to
 188              swap out system (or developer) messages in new responses.
 189
 190          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
 191              including visible output tokens and
 192              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 193
 194          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
 195              response. This maximum number applies across all built-in tool calls, not per
 196              individual tool. Any further attempts to call a tool by the model will be
 197              ignored.
 198
 199          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 200              for storing additional information about the object in a structured format, and
 201              querying for objects via API or the dashboard.
 202
 203              Keys are strings with a maximum length of 64 characters. Values are strings with
 204              a maximum length of 512 characters.
 205
 206          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
 207              wide range of models with different capabilities, performance characteristics,
 208              and price points. Refer to the
 209              [model guide](https://platform.openai.com/docs/models) to browse and compare
 210              available models.
 211
 212          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
 213
 214          previous_response_id: The unique ID of the previous response to the model. Use this to create
 215              multi-turn conversations. Learn more about
 216              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
 217              Cannot be used in conjunction with `conversation`.
 218
 219          prompt: Reference to a prompt template and its variables.
 220              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
 221
 222          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
 223              hit rates. Replaces the `user` field.
 224              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
 225
 226          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
 227              prompt caching, which keeps cached prefixes active for longer, up to a maximum
 228              of 24 hours.
 229              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
 230
 231          reasoning: **gpt-5 and o-series models only**
 232
 233              Configuration options for
 234              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 235
 236          safety_identifier: A stable identifier used to help detect users of your application that may be
 237              violating OpenAI's usage policies. The IDs should be a string that uniquely
 238              identifies each user. We recommend hashing their username or email address, in
 239              order to avoid sending us any identifying information.
 240              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
 241
 242          service_tier: Specifies the processing type used for serving the request.
 243
 244              - If set to 'auto', then the request will be processed with the service tier
 245                configured in the Project settings. Unless otherwise configured, the Project
 246                will use 'default'.
 247              - If set to 'default', then the request will be processed with the standard
 248                pricing and performance for the selected model.
 249              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
 250                '[priority](https://openai.com/api-priority-processing/)', then the request
 251                will be processed with the corresponding service tier.
 252              - When not set, the default behavior is 'auto'.
 253
 254              When the `service_tier` parameter is set, the response body will include the
 255              `service_tier` value based on the processing mode actually used to serve the
 256              request. This response value may be different from the value set in the
 257              parameter.
 258
 259          store: Whether to store the generated model response for later retrieval via API.
 260
 261          stream: If set to true, the model response data will be streamed to the client as it is
 262              generated using
 263              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
 264              See the
 265              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
 266              for more information.
 267
 268          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
 269
 270          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 271              make the output more random, while lower values like 0.2 will make it more
 272              focused and deterministic. We generally recommend altering this or `top_p` but
 273              not both.
 274
 275          text: Configuration options for a text response from the model. Can be plain text or
 276              structured JSON data. Learn more:
 277
 278              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
 279              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
 280
 281          tool_choice: How the model should select which tool (or tools) to use when generating a
 282              response. See the `tools` parameter to see how to specify which tools the model
 283              can call.
 284
 285          tools: An array of tools the model may call while generating a response. You can
 286              specify which tool to use by setting the `tool_choice` parameter.
 287
 288              We support the following categories of tools:
 289
 290              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
 291                capabilities, like
 292                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
 293                [file search](https://platform.openai.com/docs/guides/tools-file-search).
 294                Learn more about
 295                [built-in tools](https://platform.openai.com/docs/guides/tools).
 296              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
 297                predefined connectors such as Google Drive and SharePoint. Learn more about
 298                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
 299              - **Function calls (custom tools)**: Functions that are defined by you, enabling
 300                the model to call your own code with strongly typed arguments and outputs.
 301                Learn more about
 302                [function calling](https://platform.openai.com/docs/guides/function-calling).
 303                You can also use custom tools to call your own code.
 304
 305          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
 306              return at each token position, each with an associated log probability.
 307
 308          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 309              model considers the results of the tokens with top_p probability mass. So 0.1
 310              means only the tokens comprising the top 10% probability mass are considered.
 311
 312              We generally recommend altering this or `temperature` but not both.
 313
 314          truncation: The truncation strategy to use for the model response.
 315
 316              - `auto`: If the input to this Response exceeds the model's context window size,
 317                the model will truncate the response to fit the context window by dropping
 318                items from the beginning of the conversation.
 319              - `disabled` (default): If the input size will exceed the context window size
 320                for a model, the request will fail with a 400 error.
 321
 322          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
 323              `prompt_cache_key` instead to maintain caching optimizations. A stable
 324              identifier for your end-users. Used to boost cache hit rates by better bucketing
 325              similar requests and to help OpenAI detect and prevent abuse.
 326              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
 327
 328          extra_headers: Send extra headers
 329
 330          extra_query: Add additional query parameters to the request
 331
 332          extra_body: Add additional JSON properties to the request
 333
 334          timeout: Override the client-level default timeout for this request, in seconds
 335        """
 336        ...
 337
 338    @overload
 339    def create(
 340        self,
 341        *,
 342        stream: Literal[True],
 343        background: Optional[bool] | Omit = omit,
 344        conversation: Optional[response_create_params.Conversation] | Omit = omit,
 345        include: Optional[List[ResponseIncludable]] | Omit = omit,
 346        input: Union[str, ResponseInputParam] | Omit = omit,
 347        instructions: Optional[str] | Omit = omit,
 348        max_output_tokens: Optional[int] | Omit = omit,
 349        max_tool_calls: Optional[int] | Omit = omit,
 350        metadata: Optional[Metadata] | Omit = omit,
 351        model: ResponsesModel | Omit = omit,
 352        parallel_tool_calls: Optional[bool] | Omit = omit,
 353        previous_response_id: Optional[str] | Omit = omit,
 354        prompt: Optional[ResponsePromptParam] | Omit = omit,
 355        prompt_cache_key: str | Omit = omit,
 356        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
 357        reasoning: Optional[Reasoning] | Omit = omit,
 358        safety_identifier: str | Omit = omit,
 359        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
 360        store: Optional[bool] | Omit = omit,
 361        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
 362        temperature: Optional[float] | Omit = omit,
 363        text: ResponseTextConfigParam | Omit = omit,
 364        tool_choice: response_create_params.ToolChoice | Omit = omit,
 365        tools: Iterable[ToolParam] | Omit = omit,
 366        top_logprobs: Optional[int] | Omit = omit,
 367        top_p: Optional[float] | Omit = omit,
 368        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
 369        user: str | Omit = omit,
 370        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 371        # The extra values given here take precedence over values defined on the client or passed to this method.
 372        extra_headers: Headers | None = None,
 373        extra_query: Query | None = None,
 374        extra_body: Body | None = None,
 375        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 376    ) -> Stream[ResponseStreamEvent]:
 377        """Creates a model response.
 378
 379        Provide
 380        [text](https://platform.openai.com/docs/guides/text) or
 381        [image](https://platform.openai.com/docs/guides/images) inputs to generate
 382        [text](https://platform.openai.com/docs/guides/text) or
 383        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
 384        the model call your own
 385        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
 386        built-in [tools](https://platform.openai.com/docs/guides/tools) like
 387        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
 388        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
 389        your own data as input for the model's response.
 390
 391        Args:
 392          stream: If set to true, the model response data will be streamed to the client as it is
 393              generated using
 394              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
 395              See the
 396              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
 397              for more information.
 398
 399          background: Whether to run the model response in the background.
 400              [Learn more](https://platform.openai.com/docs/guides/background).
 401
 402          conversation: The conversation that this response belongs to. Items from this conversation are
 403              prepended to `input_items` for this response request. Input items and output
 404              items from this response are automatically added to this conversation after this
 405              response completes.
 406
 407          include: Specify additional output data to include in the model response. Currently
 408              supported values are:
 409
 410              - `web_search_call.action.sources`: Include the sources of the web search tool
 411                call.
 412              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
 413                in code interpreter tool call items.
 414              - `computer_call_output.output.image_url`: Include image urls from the computer
 415                call output.
 416              - `file_search_call.results`: Include the search results of the file search tool
 417                call.
 418              - `message.input_image.image_url`: Include image urls from the input message.
 419              - `message.output_text.logprobs`: Include logprobs with assistant messages.
 420              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
 421                tokens in reasoning item outputs. This enables reasoning items to be used in
 422                multi-turn conversations when using the Responses API statelessly (like when
 423                the `store` parameter is set to `false`, or when an organization is enrolled
 424                in the zero data retention program).
 425
 426          input: Text, image, or file inputs to the model, used to generate a response.
 427
 428              Learn more:
 429
 430              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
 431              - [Image inputs](https://platform.openai.com/docs/guides/images)
 432              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
 433              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
 434              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 435
 436          instructions: A system (or developer) message inserted into the model's context.
 437
 438              When using along with `previous_response_id`, the instructions from a previous
 439              response will not be carried over to the next response. This makes it simple to
 440              swap out system (or developer) messages in new responses.
 441
 442          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
 443              including visible output tokens and
 444              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 445
 446          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
 447              response. This maximum number applies across all built-in tool calls, not per
 448              individual tool. Any further attempts to call a tool by the model will be
 449              ignored.
 450
 451          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 452              for storing additional information about the object in a structured format, and
 453              querying for objects via API or the dashboard.
 454
 455              Keys are strings with a maximum length of 64 characters. Values are strings with
 456              a maximum length of 512 characters.
 457
 458          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
 459              wide range of models with different capabilities, performance characteristics,
 460              and price points. Refer to the
 461              [model guide](https://platform.openai.com/docs/models) to browse and compare
 462              available models.
 463
 464          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
 465
 466          previous_response_id: The unique ID of the previous response to the model. Use this to create
 467              multi-turn conversations. Learn more about
 468              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
 469              Cannot be used in conjunction with `conversation`.
 470
 471          prompt: Reference to a prompt template and its variables.
 472              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
 473
 474          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
 475              hit rates. Replaces the `user` field.
 476              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
 477
 478          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
 479              prompt caching, which keeps cached prefixes active for longer, up to a maximum
 480              of 24 hours.
 481              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
 482
 483          reasoning: **gpt-5 and o-series models only**
 484
 485              Configuration options for
 486              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 487
 488          safety_identifier: A stable identifier used to help detect users of your application that may be
 489              violating OpenAI's usage policies. The IDs should be a string that uniquely
 490              identifies each user. We recommend hashing their username or email address, in
 491              order to avoid sending us any identifying information.
 492              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
 493
 494          service_tier: Specifies the processing type used for serving the request.
 495
 496              - If set to 'auto', then the request will be processed with the service tier
 497                configured in the Project settings. Unless otherwise configured, the Project
 498                will use 'default'.
 499              - If set to 'default', then the request will be processed with the standard
 500                pricing and performance for the selected model.
 501              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
 502                '[priority](https://openai.com/api-priority-processing/)', then the request
 503                will be processed with the corresponding service tier.
 504              - When not set, the default behavior is 'auto'.
 505
 506              When the `service_tier` parameter is set, the response body will include the
 507              `service_tier` value based on the processing mode actually used to serve the
 508              request. This response value may be different from the value set in the
 509              parameter.
 510
 511          store: Whether to store the generated model response for later retrieval via API.
 512
 513          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
 514
 515          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 516              make the output more random, while lower values like 0.2 will make it more
 517              focused and deterministic. We generally recommend altering this or `top_p` but
 518              not both.
 519
 520          text: Configuration options for a text response from the model. Can be plain text or
 521              structured JSON data. Learn more:
 522
 523              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
 524              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
 525
 526          tool_choice: How the model should select which tool (or tools) to use when generating a
 527              response. See the `tools` parameter to see how to specify which tools the model
 528              can call.
 529
 530          tools: An array of tools the model may call while generating a response. You can
 531              specify which tool to use by setting the `tool_choice` parameter.
 532
 533              We support the following categories of tools:
 534
 535              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
 536                capabilities, like
 537                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
 538                [file search](https://platform.openai.com/docs/guides/tools-file-search).
 539                Learn more about
 540                [built-in tools](https://platform.openai.com/docs/guides/tools).
 541              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
 542                predefined connectors such as Google Drive and SharePoint. Learn more about
 543                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
 544              - **Function calls (custom tools)**: Functions that are defined by you, enabling
 545                the model to call your own code with strongly typed arguments and outputs.
 546                Learn more about
 547                [function calling](https://platform.openai.com/docs/guides/function-calling).
 548                You can also use custom tools to call your own code.
 549
 550          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
 551              return at each token position, each with an associated log probability.
 552
 553          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 554              model considers the results of the tokens with top_p probability mass. So 0.1
 555              means only the tokens comprising the top 10% probability mass are considered.
 556
 557              We generally recommend altering this or `temperature` but not both.
 558
 559          truncation: The truncation strategy to use for the model response.
 560
 561              - `auto`: If the input to this Response exceeds the model's context window size,
 562                the model will truncate the response to fit the context window by dropping
 563                items from the beginning of the conversation.
 564              - `disabled` (default): If the input size will exceed the context window size
 565                for a model, the request will fail with a 400 error.
 566
 567          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
 568              `prompt_cache_key` instead to maintain caching optimizations. A stable
 569              identifier for your end-users. Used to boost cache hit rates by better bucketing
 570              similar requests and to help OpenAI detect and prevent abuse.
 571              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
 572
 573          extra_headers: Send extra headers
 574
 575          extra_query: Add additional query parameters to the request
 576
 577          extra_body: Add additional JSON properties to the request
 578
 579          timeout: Override the client-level default timeout for this request, in seconds
 580        """
 581        ...
 582
 583    @overload
 584    def create(
 585        self,
 586        *,
 587        stream: bool,
 588        background: Optional[bool] | Omit = omit,
 589        conversation: Optional[response_create_params.Conversation] | Omit = omit,
 590        include: Optional[List[ResponseIncludable]] | Omit = omit,
 591        input: Union[str, ResponseInputParam] | Omit = omit,
 592        instructions: Optional[str] | Omit = omit,
 593        max_output_tokens: Optional[int] | Omit = omit,
 594        max_tool_calls: Optional[int] | Omit = omit,
 595        metadata: Optional[Metadata] | Omit = omit,
 596        model: ResponsesModel | Omit = omit,
 597        parallel_tool_calls: Optional[bool] | Omit = omit,
 598        previous_response_id: Optional[str] | Omit = omit,
 599        prompt: Optional[ResponsePromptParam] | Omit = omit,
 600        prompt_cache_key: str | Omit = omit,
 601        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
 602        reasoning: Optional[Reasoning] | Omit = omit,
 603        safety_identifier: str | Omit = omit,
 604        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
 605        store: Optional[bool] | Omit = omit,
 606        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
 607        temperature: Optional[float] | Omit = omit,
 608        text: ResponseTextConfigParam | Omit = omit,
 609        tool_choice: response_create_params.ToolChoice | Omit = omit,
 610        tools: Iterable[ToolParam] | Omit = omit,
 611        top_logprobs: Optional[int] | Omit = omit,
 612        top_p: Optional[float] | Omit = omit,
 613        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
 614        user: str | Omit = omit,
 615        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 616        # The extra values given here take precedence over values defined on the client or passed to this method.
 617        extra_headers: Headers | None = None,
 618        extra_query: Query | None = None,
 619        extra_body: Body | None = None,
 620        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 621    ) -> Response | Stream[ResponseStreamEvent]:
 622        """Creates a model response.
 623
 624        Provide
 625        [text](https://platform.openai.com/docs/guides/text) or
 626        [image](https://platform.openai.com/docs/guides/images) inputs to generate
 627        [text](https://platform.openai.com/docs/guides/text) or
 628        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
 629        the model call your own
 630        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
 631        built-in [tools](https://platform.openai.com/docs/guides/tools) like
 632        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
 633        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
 634        your own data as input for the model's response.
 635
 636        Args:
 637          stream: If set to true, the model response data will be streamed to the client as it is
 638              generated using
 639              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
 640              See the
 641              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
 642              for more information.
 643
 644          background: Whether to run the model response in the background.
 645              [Learn more](https://platform.openai.com/docs/guides/background).
 646
 647          conversation: The conversation that this response belongs to. Items from this conversation are
 648              prepended to `input_items` for this response request. Input items and output
 649              items from this response are automatically added to this conversation after this
 650              response completes.
 651
 652          include: Specify additional output data to include in the model response. Currently
 653              supported values are:
 654
 655              - `web_search_call.action.sources`: Include the sources of the web search tool
 656                call.
 657              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
 658                in code interpreter tool call items.
 659              - `computer_call_output.output.image_url`: Include image urls from the computer
 660                call output.
 661              - `file_search_call.results`: Include the search results of the file search tool
 662                call.
 663              - `message.input_image.image_url`: Include image urls from the input message.
 664              - `message.output_text.logprobs`: Include logprobs with assistant messages.
 665              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
 666                tokens in reasoning item outputs. This enables reasoning items to be used in
 667                multi-turn conversations when using the Responses API statelessly (like when
 668                the `store` parameter is set to `false`, or when an organization is enrolled
 669                in the zero data retention program).
 670
 671          input: Text, image, or file inputs to the model, used to generate a response.
 672
 673              Learn more:
 674
 675              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
 676              - [Image inputs](https://platform.openai.com/docs/guides/images)
 677              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
 678              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
 679              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 680
 681          instructions: A system (or developer) message inserted into the model's context.
 682
 683              When using along with `previous_response_id`, the instructions from a previous
 684              response will not be carried over to the next response. This makes it simple to
 685              swap out system (or developer) messages in new responses.
 686
 687          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
 688              including visible output tokens and
 689              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 690
 691          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
 692              response. This maximum number applies across all built-in tool calls, not per
 693              individual tool. Any further attempts to call a tool by the model will be
 694              ignored.
 695
 696          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 697              for storing additional information about the object in a structured format, and
 698              querying for objects via API or the dashboard.
 699
 700              Keys are strings with a maximum length of 64 characters. Values are strings with
 701              a maximum length of 512 characters.
 702
 703          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
 704              wide range of models with different capabilities, performance characteristics,
 705              and price points. Refer to the
 706              [model guide](https://platform.openai.com/docs/models) to browse and compare
 707              available models.
 708
 709          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
 710
 711          previous_response_id: The unique ID of the previous response to the model. Use this to create
 712              multi-turn conversations. Learn more about
 713              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
 714              Cannot be used in conjunction with `conversation`.
 715
 716          prompt: Reference to a prompt template and its variables.
 717              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
 718
 719          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
 720              hit rates. Replaces the `user` field.
 721              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
 722
 723          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
 724              prompt caching, which keeps cached prefixes active for longer, up to a maximum
 725              of 24 hours.
 726              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
 727
 728          reasoning: **gpt-5 and o-series models only**
 729
 730              Configuration options for
 731              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 732
 733          safety_identifier: A stable identifier used to help detect users of your application that may be
 734              violating OpenAI's usage policies. The IDs should be a string that uniquely
 735              identifies each user. We recommend hashing their username or email address, in
 736              order to avoid sending us any identifying information.
 737              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
 738
 739          service_tier: Specifies the processing type used for serving the request.
 740
 741              - If set to 'auto', then the request will be processed with the service tier
 742                configured in the Project settings. Unless otherwise configured, the Project
 743                will use 'default'.
 744              - If set to 'default', then the request will be processed with the standard
 745                pricing and performance for the selected model.
 746              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
 747                '[priority](https://openai.com/api-priority-processing/)', then the request
 748                will be processed with the corresponding service tier.
 749              - When not set, the default behavior is 'auto'.
 750
 751              When the `service_tier` parameter is set, the response body will include the
 752              `service_tier` value based on the processing mode actually used to serve the
 753              request. This response value may be different from the value set in the
 754              parameter.
 755
 756          store: Whether to store the generated model response for later retrieval via API.
 757
 758          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
 759
 760          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 761              make the output more random, while lower values like 0.2 will make it more
 762              focused and deterministic. We generally recommend altering this or `top_p` but
 763              not both.
 764
 765          text: Configuration options for a text response from the model. Can be plain text or
 766              structured JSON data. Learn more:
 767
 768              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
 769              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
 770
 771          tool_choice: How the model should select which tool (or tools) to use when generating a
 772              response. See the `tools` parameter to see how to specify which tools the model
 773              can call.
 774
 775          tools: An array of tools the model may call while generating a response. You can
 776              specify which tool to use by setting the `tool_choice` parameter.
 777
 778              We support the following categories of tools:
 779
 780              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
 781                capabilities, like
 782                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
 783                [file search](https://platform.openai.com/docs/guides/tools-file-search).
 784                Learn more about
 785                [built-in tools](https://platform.openai.com/docs/guides/tools).
 786              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
 787                predefined connectors such as Google Drive and SharePoint. Learn more about
 788                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
 789              - **Function calls (custom tools)**: Functions that are defined by you, enabling
 790                the model to call your own code with strongly typed arguments and outputs.
 791                Learn more about
 792                [function calling](https://platform.openai.com/docs/guides/function-calling).
 793                You can also use custom tools to call your own code.
 794
 795          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
 796              return at each token position, each with an associated log probability.
 797
 798          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 799              model considers the results of the tokens with top_p probability mass. So 0.1
 800              means only the tokens comprising the top 10% probability mass are considered.
 801
 802              We generally recommend altering this or `temperature` but not both.
 803
 804          truncation: The truncation strategy to use for the model response.
 805
 806              - `auto`: If the input to this Response exceeds the model's context window size,
 807                the model will truncate the response to fit the context window by dropping
 808                items from the beginning of the conversation.
 809              - `disabled` (default): If the input size will exceed the context window size
 810                for a model, the request will fail with a 400 error.
 811
 812          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
 813              `prompt_cache_key` instead to maintain caching optimizations. A stable
 814              identifier for your end-users. Used to boost cache hit rates by better bucketing
 815              similar requests and to help OpenAI detect and prevent abuse.
 816              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
 817
 818          extra_headers: Send extra headers
 819
 820          extra_query: Add additional query parameters to the request
 821
 822          extra_body: Add additional JSON properties to the request
 823
 824          timeout: Override the client-level default timeout for this request, in seconds
 825        """
 826        ...
 827
 828    def create(
 829        self,
 830        *,
 831        background: Optional[bool] | Omit = omit,
 832        conversation: Optional[response_create_params.Conversation] | Omit = omit,
 833        include: Optional[List[ResponseIncludable]] | Omit = omit,
 834        input: Union[str, ResponseInputParam] | Omit = omit,
 835        instructions: Optional[str] | Omit = omit,
 836        max_output_tokens: Optional[int] | Omit = omit,
 837        max_tool_calls: Optional[int] | Omit = omit,
 838        metadata: Optional[Metadata] | Omit = omit,
 839        model: ResponsesModel | Omit = omit,
 840        parallel_tool_calls: Optional[bool] | Omit = omit,
 841        previous_response_id: Optional[str] | Omit = omit,
 842        prompt: Optional[ResponsePromptParam] | Omit = omit,
 843        prompt_cache_key: str | Omit = omit,
 844        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
 845        reasoning: Optional[Reasoning] | Omit = omit,
 846        safety_identifier: str | Omit = omit,
 847        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
 848        store: Optional[bool] | Omit = omit,
 849        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
 850        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
 851        temperature: Optional[float] | Omit = omit,
 852        text: ResponseTextConfigParam | Omit = omit,
 853        tool_choice: response_create_params.ToolChoice | Omit = omit,
 854        tools: Iterable[ToolParam] | Omit = omit,
 855        top_logprobs: Optional[int] | Omit = omit,
 856        top_p: Optional[float] | Omit = omit,
 857        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
 858        user: str | Omit = omit,
 859        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 860        # The extra values given here take precedence over values defined on the client or passed to this method.
 861        extra_headers: Headers | None = None,
 862        extra_query: Query | None = None,
 863        extra_body: Body | None = None,
 864        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 865    ) -> Response | Stream[ResponseStreamEvent]:
 866        return self._post(
 867            "/responses",
 868            body=maybe_transform(
 869                {
 870                    "background": background,
 871                    "conversation": conversation,
 872                    "include": include,
 873                    "input": input,
 874                    "instructions": instructions,
 875                    "max_output_tokens": max_output_tokens,
 876                    "max_tool_calls": max_tool_calls,
 877                    "metadata": metadata,
 878                    "model": model,
 879                    "parallel_tool_calls": parallel_tool_calls,
 880                    "previous_response_id": previous_response_id,
 881                    "prompt": prompt,
 882                    "prompt_cache_key": prompt_cache_key,
 883                    "prompt_cache_retention": prompt_cache_retention,
 884                    "reasoning": reasoning,
 885                    "safety_identifier": safety_identifier,
 886                    "service_tier": service_tier,
 887                    "store": store,
 888                    "stream": stream,
 889                    "stream_options": stream_options,
 890                    "temperature": temperature,
 891                    "text": text,
 892                    "tool_choice": tool_choice,
 893                    "tools": tools,
 894                    "top_logprobs": top_logprobs,
 895                    "top_p": top_p,
 896                    "truncation": truncation,
 897                    "user": user,
 898                },
 899                response_create_params.ResponseCreateParamsStreaming
 900                if stream
 901                else response_create_params.ResponseCreateParamsNonStreaming,
 902            ),
 903            options=make_request_options(
 904                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 905            ),
 906            cast_to=Response,
 907            stream=stream or False,
 908            stream_cls=Stream[ResponseStreamEvent],
 909        )
 910
 911    @overload
 912    def stream(
 913        self,
 914        *,
 915        response_id: str,
 916        text_format: type[TextFormatT] | Omit = omit,
 917        starting_after: int | Omit = omit,
 918        tools: Iterable[ParseableToolParam] | Omit = omit,
 919        # The extra values given here take precedence over values defined on the client or passed to this method.
 920        extra_headers: Headers | None = None,
 921        extra_query: Query | None = None,
 922        extra_body: Body | None = None,
 923        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
 924    ) -> ResponseStreamManager[TextFormatT]: ...
 925
 926    @overload
 927    def stream(
 928        self,
 929        *,
 930        input: Union[str, ResponseInputParam],
 931        model: ResponsesModel,
 932        background: Optional[bool] | Omit = omit,
 933        text_format: type[TextFormatT] | Omit = omit,
 934        tools: Iterable[ParseableToolParam] | Omit = omit,
 935        conversation: Optional[response_create_params.Conversation] | Omit = omit,
 936        include: Optional[List[ResponseIncludable]] | Omit = omit,
 937        instructions: Optional[str] | Omit = omit,
 938        max_output_tokens: Optional[int] | Omit = omit,
 939        max_tool_calls: Optional[int] | Omit = omit,
 940        metadata: Optional[Metadata] | Omit = omit,
 941        parallel_tool_calls: Optional[bool] | Omit = omit,
 942        previous_response_id: Optional[str] | Omit = omit,
 943        prompt: Optional[ResponsePromptParam] | Omit = omit,
 944        prompt_cache_key: str | Omit = omit,
 945        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
 946        reasoning: Optional[Reasoning] | Omit = omit,
 947        safety_identifier: str | Omit = omit,
 948        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
 949        store: Optional[bool] | Omit = omit,
 950        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
 951        temperature: Optional[float] | Omit = omit,
 952        text: ResponseTextConfigParam | Omit = omit,
 953        tool_choice: response_create_params.ToolChoice | Omit = omit,
 954        top_logprobs: Optional[int] | Omit = omit,
 955        top_p: Optional[float] | Omit = omit,
 956        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
 957        user: str | Omit = omit,
 958        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 959        # The extra values given here take precedence over values defined on the client or passed to this method.
 960        extra_headers: Headers | None = None,
 961        extra_query: Query | None = None,
 962        extra_body: Body | None = None,
 963        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
 964    ) -> ResponseStreamManager[TextFormatT]: ...
 965
 966    def stream(
 967        self,
 968        *,
 969        response_id: str | Omit = omit,
 970        input: Union[str, ResponseInputParam] | Omit = omit,
 971        model: ResponsesModel | Omit = omit,
 972        background: Optional[bool] | Omit = omit,
 973        text_format: type[TextFormatT] | Omit = omit,
 974        tools: Iterable[ParseableToolParam] | Omit = omit,
 975        conversation: Optional[response_create_params.Conversation] | Omit = omit,
 976        include: Optional[List[ResponseIncludable]] | Omit = omit,
 977        instructions: Optional[str] | Omit = omit,
 978        max_output_tokens: Optional[int] | Omit = omit,
 979        max_tool_calls: Optional[int] | Omit = omit,
 980        metadata: Optional[Metadata] | Omit = omit,
 981        parallel_tool_calls: Optional[bool] | Omit = omit,
 982        previous_response_id: Optional[str] | Omit = omit,
 983        prompt: Optional[ResponsePromptParam] | Omit = omit,
 984        prompt_cache_key: str | Omit = omit,
 985        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
 986        reasoning: Optional[Reasoning] | Omit = omit,
 987        safety_identifier: str | Omit = omit,
 988        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
 989        store: Optional[bool] | Omit = omit,
 990        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
 991        temperature: Optional[float] | Omit = omit,
 992        text: ResponseTextConfigParam | Omit = omit,
 993        tool_choice: response_create_params.ToolChoice | Omit = omit,
 994        top_logprobs: Optional[int] | Omit = omit,
 995        top_p: Optional[float] | Omit = omit,
 996        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
 997        user: str | Omit = omit,
 998        starting_after: int | Omit = omit,
 999        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1000        # The extra values given here take precedence over values defined on the client or passed to this method.
1001        extra_headers: Headers | None = None,
1002        extra_query: Query | None = None,
1003        extra_body: Body | None = None,
1004        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1005    ) -> ResponseStreamManager[TextFormatT]:
1006        new_response_args = {
1007            "input": input,
1008            "model": model,
1009            "conversation": conversation,
1010            "include": include,
1011            "instructions": instructions,
1012            "max_output_tokens": max_output_tokens,
1013            "max_tool_calls": max_tool_calls,
1014            "metadata": metadata,
1015            "parallel_tool_calls": parallel_tool_calls,
1016            "previous_response_id": previous_response_id,
1017            "prompt": prompt,
1018            "prompt_cache_key": prompt_cache_key,
1019            "prompt_cache_retention": prompt_cache_retention,
1020            "reasoning": reasoning,
1021            "safety_identifier": safety_identifier,
1022            "service_tier": service_tier,
1023            "store": store,
1024            "stream_options": stream_options,
1025            "temperature": temperature,
1026            "text": text,
1027            "tool_choice": tool_choice,
1028            "top_logprobs": top_logprobs,
1029            "top_p": top_p,
1030            "truncation": truncation,
1031            "user": user,
1032            "background": background,
1033        }
1034        new_response_args_names = [k for k, v in new_response_args.items() if is_given(v)]
1035
1036        if (is_given(response_id) or is_given(starting_after)) and len(new_response_args_names) > 0:
1037            raise ValueError(
1038                "Cannot provide both response_id/starting_after can't be provided together with "
1039                + ", ".join(new_response_args_names)
1040            )
1041        tools = _make_tools(tools)
1042        if len(new_response_args_names) > 0:
1043            if not is_given(input):
1044                raise ValueError("input must be provided when creating a new response")
1045
1046            if not is_given(model):
1047                raise ValueError("model must be provided when creating a new response")
1048
1049            if is_given(text_format):
1050                if not text:
1051                    text = {}
1052
1053                if "format" in text:
1054                    raise TypeError("Cannot mix and match text.format with text_format")
1055
1056                text = copy(text)
1057                text["format"] = _type_to_text_format_param(text_format)
1058
1059            api_request: partial[Stream[ResponseStreamEvent]] = partial(
1060                self.create,
1061                input=input,
1062                model=model,
1063                tools=tools,
1064                conversation=conversation,
1065                include=include,
1066                instructions=instructions,
1067                max_output_tokens=max_output_tokens,
1068                max_tool_calls=max_tool_calls,
1069                metadata=metadata,
1070                parallel_tool_calls=parallel_tool_calls,
1071                previous_response_id=previous_response_id,
1072                prompt=prompt,
1073                prompt_cache_key=prompt_cache_key,
1074                prompt_cache_retention=prompt_cache_retention,
1075                store=store,
1076                stream_options=stream_options,
1077                stream=True,
1078                temperature=temperature,
1079                text=text,
1080                tool_choice=tool_choice,
1081                reasoning=reasoning,
1082                safety_identifier=safety_identifier,
1083                service_tier=service_tier,
1084                top_logprobs=top_logprobs,
1085                top_p=top_p,
1086                truncation=truncation,
1087                user=user,
1088                background=background,
1089                extra_headers=extra_headers,
1090                extra_query=extra_query,
1091                extra_body=extra_body,
1092                timeout=timeout,
1093            )
1094
1095            return ResponseStreamManager(api_request, text_format=text_format, input_tools=tools, starting_after=None)
1096        else:
1097            if not is_given(response_id):
1098                raise ValueError("id must be provided when streaming an existing response")
1099
1100            return ResponseStreamManager(
1101                lambda: self.retrieve(
1102                    response_id=response_id,
1103                    stream=True,
1104                    include=include or [],
1105                    extra_headers=extra_headers,
1106                    extra_query=extra_query,
1107                    extra_body=extra_body,
1108                    starting_after=omit,
1109                    timeout=timeout,
1110                ),
1111                text_format=text_format,
1112                input_tools=tools,
1113                starting_after=starting_after if is_given(starting_after) else None,
1114            )
1115
1116    def parse(
1117        self,
1118        *,
1119        text_format: type[TextFormatT] | Omit = omit,
1120        background: Optional[bool] | Omit = omit,
1121        conversation: Optional[response_create_params.Conversation] | Omit = omit,
1122        include: Optional[List[ResponseIncludable]] | Omit = omit,
1123        input: Union[str, ResponseInputParam] | Omit = omit,
1124        instructions: Optional[str] | Omit = omit,
1125        max_output_tokens: Optional[int] | Omit = omit,
1126        max_tool_calls: Optional[int] | Omit = omit,
1127        metadata: Optional[Metadata] | Omit = omit,
1128        model: ResponsesModel | Omit = omit,
1129        parallel_tool_calls: Optional[bool] | Omit = omit,
1130        previous_response_id: Optional[str] | Omit = omit,
1131        prompt: Optional[ResponsePromptParam] | Omit = omit,
1132        prompt_cache_key: str | Omit = omit,
1133        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1134        reasoning: Optional[Reasoning] | Omit = omit,
1135        safety_identifier: str | Omit = omit,
1136        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1137        store: Optional[bool] | Omit = omit,
1138        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
1139        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
1140        temperature: Optional[float] | Omit = omit,
1141        text: ResponseTextConfigParam | Omit = omit,
1142        tool_choice: response_create_params.ToolChoice | Omit = omit,
1143        tools: Iterable[ParseableToolParam] | Omit = omit,
1144        top_logprobs: Optional[int] | Omit = omit,
1145        top_p: Optional[float] | Omit = omit,
1146        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
1147        user: str | Omit = omit,
1148        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
1149        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1150        # The extra values given here take precedence over values defined on the client or passed to this method.
1151        extra_headers: Headers | None = None,
1152        extra_query: Query | None = None,
1153        extra_body: Body | None = None,
1154        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1155    ) -> ParsedResponse[TextFormatT]:
1156        if is_given(text_format):
1157            if not text:
1158                text = {}
1159
1160            if "format" in text:
1161                raise TypeError("Cannot mix and match text.format with text_format")
1162            text = copy(text)
1163            text["format"] = _type_to_text_format_param(text_format)
1164
1165        tools = _make_tools(tools)
1166
1167        def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
1168            return parse_response(
1169                input_tools=tools,
1170                text_format=text_format,
1171                response=raw_response,
1172            )
1173
1174        return self._post(
1175            "/responses",
1176            body=maybe_transform(
1177                {
1178                    "background": background,
1179                    "conversation": conversation,
1180                    "include": include,
1181                    "input": input,
1182                    "instructions": instructions,
1183                    "max_output_tokens": max_output_tokens,
1184                    "max_tool_calls": max_tool_calls,
1185                    "metadata": metadata,
1186                    "model": model,
1187                    "parallel_tool_calls": parallel_tool_calls,
1188                    "previous_response_id": previous_response_id,
1189                    "prompt": prompt,
1190                    "prompt_cache_key": prompt_cache_key,
1191                    "prompt_cache_retention": prompt_cache_retention,
1192                    "reasoning": reasoning,
1193                    "safety_identifier": safety_identifier,
1194                    "service_tier": service_tier,
1195                    "store": store,
1196                    "stream": stream,
1197                    "stream_options": stream_options,
1198                    "temperature": temperature,
1199                    "text": text,
1200                    "tool_choice": tool_choice,
1201                    "tools": tools,
1202                    "top_logprobs": top_logprobs,
1203                    "top_p": top_p,
1204                    "truncation": truncation,
1205                    "user": user,
1206                    "verbosity": verbosity,
1207                },
1208                response_create_params.ResponseCreateParams,
1209            ),
1210            options=make_request_options(
1211                extra_headers=extra_headers,
1212                extra_query=extra_query,
1213                extra_body=extra_body,
1214                timeout=timeout,
1215                post_parser=parser,
1216            ),
1217            # we turn the `Response` instance into a `ParsedResponse`
1218            # in the `parser` function above
1219            cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
1220        )
1221
1222    @overload
1223    def retrieve(
1224        self,
1225        response_id: str,
1226        *,
1227        include: List[ResponseIncludable] | Omit = omit,
1228        include_obfuscation: bool | Omit = omit,
1229        starting_after: int | Omit = omit,
1230        stream: Literal[False] | Omit = omit,
1231        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1232        # The extra values given here take precedence over values defined on the client or passed to this method.
1233        extra_headers: Headers | None = None,
1234        extra_query: Query | None = None,
1235        extra_body: Body | None = None,
1236        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1237    ) -> Response: ...
1238
1239    @overload
1240    def retrieve(
1241        self,
1242        response_id: str,
1243        *,
1244        stream: Literal[True],
1245        include: List[ResponseIncludable] | Omit = omit,
1246        starting_after: int | Omit = omit,
1247        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1248        # The extra values given here take precedence over values defined on the client or passed to this method.
1249        extra_headers: Headers | None = None,
1250        extra_query: Query | None = None,
1251        extra_body: Body | None = None,
1252        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1253    ) -> Stream[ResponseStreamEvent]: ...
1254
1255    @overload
1256    def retrieve(
1257        self,
1258        response_id: str,
1259        *,
1260        stream: bool,
1261        include: List[ResponseIncludable] | Omit = omit,
1262        starting_after: int | Omit = omit,
1263        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1264        # The extra values given here take precedence over values defined on the client or passed to this method.
1265        extra_headers: Headers | None = None,
1266        extra_query: Query | None = None,
1267        extra_body: Body | None = None,
1268        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1269    ) -> Response | Stream[ResponseStreamEvent]: ...
1270
1271    @overload
1272    def retrieve(
1273        self,
1274        response_id: str,
1275        *,
1276        stream: bool = False,
1277        include: List[ResponseIncludable] | Omit = omit,
1278        starting_after: int | Omit = omit,
1279        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1280        # The extra values given here take precedence over values defined on the client or passed to this method.
1281        extra_headers: Headers | None = None,
1282        extra_query: Query | None = None,
1283        extra_body: Body | None = None,
1284        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
1285    ) -> Response | Stream[ResponseStreamEvent]:
1286        """
1287        Retrieves a model response with the given ID.
1288
1289        Args:
1290          include: Additional fields to include in the response. See the `include` parameter for
1291              Response creation above for more information.
1292
1293          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
1294              characters to an `obfuscation` field on streaming delta events to normalize
1295              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
1296              fields are included by default, but add a small amount of overhead to the data
1297              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
1298              you trust the network links between your application and the OpenAI API.
1299
1300          starting_after: The sequence number of the event after which to start streaming.
1301
1302          stream: If set to true, the model response data will be streamed to the client as it is
1303              generated using
1304              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
1305              See the
1306              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
1307              for more information.
1308
1309          extra_headers: Send extra headers
1310
1311          extra_query: Add additional query parameters to the request
1312
1313          extra_body: Add additional JSON properties to the request
1314
1315          timeout: Override the client-level default timeout for this request, in seconds
1316        """
1317        ...
1318
1319    @overload
1320    def retrieve(
1321        self,
1322        response_id: str,
1323        *,
1324        stream: Literal[True],
1325        include: List[ResponseIncludable] | Omit = omit,
1326        include_obfuscation: bool | Omit = omit,
1327        starting_after: int | Omit = omit,
1328        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1329        # The extra values given here take precedence over values defined on the client or passed to this method.
1330        extra_headers: Headers | None = None,
1331        extra_query: Query | None = None,
1332        extra_body: Body | None = None,
1333        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1334    ) -> Stream[ResponseStreamEvent]:
1335        """
1336        Retrieves a model response with the given ID.
1337
1338        Args:
1339          stream: If set to true, the model response data will be streamed to the client as it is
1340              generated using
1341              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
1342              See the
1343              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
1344              for more information.
1345
1346          include: Additional fields to include in the response. See the `include` parameter for
1347              Response creation above for more information.
1348
1349          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
1350              characters to an `obfuscation` field on streaming delta events to normalize
1351              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
1352              fields are included by default, but add a small amount of overhead to the data
1353              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
1354              you trust the network links between your application and the OpenAI API.
1355
1356          starting_after: The sequence number of the event after which to start streaming.
1357
1358          extra_headers: Send extra headers
1359
1360          extra_query: Add additional query parameters to the request
1361
1362          extra_body: Add additional JSON properties to the request
1363
1364          timeout: Override the client-level default timeout for this request, in seconds
1365        """
1366        ...
1367
1368    @overload
1369    def retrieve(
1370        self,
1371        response_id: str,
1372        *,
1373        stream: bool,
1374        include: List[ResponseIncludable] | Omit = omit,
1375        include_obfuscation: bool | Omit = omit,
1376        starting_after: int | Omit = omit,
1377        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1378        # The extra values given here take precedence over values defined on the client or passed to this method.
1379        extra_headers: Headers | None = None,
1380        extra_query: Query | None = None,
1381        extra_body: Body | None = None,
1382        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1383    ) -> Response | Stream[ResponseStreamEvent]:
1384        """
1385        Retrieves a model response with the given ID.
1386
1387        Args:
1388          stream: If set to true, the model response data will be streamed to the client as it is
1389              generated using
1390              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
1391              See the
1392              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
1393              for more information.
1394
1395          include: Additional fields to include in the response. See the `include` parameter for
1396              Response creation above for more information.
1397
1398          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
1399              characters to an `obfuscation` field on streaming delta events to normalize
1400              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
1401              fields are included by default, but add a small amount of overhead to the data
1402              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
1403              you trust the network links between your application and the OpenAI API.
1404
1405          starting_after: The sequence number of the event after which to start streaming.
1406
1407          extra_headers: Send extra headers
1408
1409          extra_query: Add additional query parameters to the request
1410
1411          extra_body: Add additional JSON properties to the request
1412
1413          timeout: Override the client-level default timeout for this request, in seconds
1414        """
1415        ...
1416
1417    def retrieve(
1418        self,
1419        response_id: str,
1420        *,
1421        include: List[ResponseIncludable] | Omit = omit,
1422        include_obfuscation: bool | Omit = omit,
1423        starting_after: int | Omit = omit,
1424        stream: Literal[False] | Literal[True] | Omit = omit,
1425        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1426        # The extra values given here take precedence over values defined on the client or passed to this method.
1427        extra_headers: Headers | None = None,
1428        extra_query: Query | None = None,
1429        extra_body: Body | None = None,
1430        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1431    ) -> Response | Stream[ResponseStreamEvent]:
1432        if not response_id:
1433            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
1434        return self._get(
1435            f"/responses/{response_id}",
1436            options=make_request_options(
1437                extra_headers=extra_headers,
1438                extra_query=extra_query,
1439                extra_body=extra_body,
1440                timeout=timeout,
1441                query=maybe_transform(
1442                    {
1443                        "include": include,
1444                        "include_obfuscation": include_obfuscation,
1445                        "starting_after": starting_after,
1446                        "stream": stream,
1447                    },
1448                    response_retrieve_params.ResponseRetrieveParams,
1449                ),
1450            ),
1451            cast_to=Response,
1452            stream=stream or False,
1453            stream_cls=Stream[ResponseStreamEvent],
1454        )
1455
1456    def delete(
1457        self,
1458        response_id: str,
1459        *,
1460        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1461        # The extra values given here take precedence over values defined on the client or passed to this method.
1462        extra_headers: Headers | None = None,
1463        extra_query: Query | None = None,
1464        extra_body: Body | None = None,
1465        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1466    ) -> None:
1467        """
1468        Deletes a model response with the given ID.
1469
1470        Args:
1471          extra_headers: Send extra headers
1472
1473          extra_query: Add additional query parameters to the request
1474
1475          extra_body: Add additional JSON properties to the request
1476
1477          timeout: Override the client-level default timeout for this request, in seconds
1478        """
1479        if not response_id:
1480            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
1481        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
1482        return self._delete(
1483            f"/responses/{response_id}",
1484            options=make_request_options(
1485                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1486            ),
1487            cast_to=NoneType,
1488        )
1489
1490    def cancel(
1491        self,
1492        response_id: str,
1493        *,
1494        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1495        # The extra values given here take precedence over values defined on the client or passed to this method.
1496        extra_headers: Headers | None = None,
1497        extra_query: Query | None = None,
1498        extra_body: Body | None = None,
1499        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1500    ) -> Response:
1501        """Cancels a model response with the given ID.
1502
1503        Only responses created with the
1504        `background` parameter set to `true` can be cancelled.
1505        [Learn more](https://platform.openai.com/docs/guides/background).
1506
1507        Args:
1508          extra_headers: Send extra headers
1509
1510          extra_query: Add additional query parameters to the request
1511
1512          extra_body: Add additional JSON properties to the request
1513
1514          timeout: Override the client-level default timeout for this request, in seconds
1515        """
1516        if not response_id:
1517            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
1518        return self._post(
1519            f"/responses/{response_id}/cancel",
1520            options=make_request_options(
1521                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1522            ),
1523            cast_to=Response,
1524        )
1525
1526    def compact(
1527        self,
1528        *,
1529        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
1530        instructions: Optional[str] | Omit = omit,
1531        model: Union[
1532            Literal[
1533                "gpt-5.1",
1534                "gpt-5.1-2025-11-13",
1535                "gpt-5.1-codex",
1536                "gpt-5.1-mini",
1537                "gpt-5.1-chat-latest",
1538                "gpt-5",
1539                "gpt-5-mini",
1540                "gpt-5-nano",
1541                "gpt-5-2025-08-07",
1542                "gpt-5-mini-2025-08-07",
1543                "gpt-5-nano-2025-08-07",
1544                "gpt-5-chat-latest",
1545                "gpt-4.1",
1546                "gpt-4.1-mini",
1547                "gpt-4.1-nano",
1548                "gpt-4.1-2025-04-14",
1549                "gpt-4.1-mini-2025-04-14",
1550                "gpt-4.1-nano-2025-04-14",
1551                "o4-mini",
1552                "o4-mini-2025-04-16",
1553                "o3",
1554                "o3-2025-04-16",
1555                "o3-mini",
1556                "o3-mini-2025-01-31",
1557                "o1",
1558                "o1-2024-12-17",
1559                "o1-preview",
1560                "o1-preview-2024-09-12",
1561                "o1-mini",
1562                "o1-mini-2024-09-12",
1563                "gpt-4o",
1564                "gpt-4o-2024-11-20",
1565                "gpt-4o-2024-08-06",
1566                "gpt-4o-2024-05-13",
1567                "gpt-4o-audio-preview",
1568                "gpt-4o-audio-preview-2024-10-01",
1569                "gpt-4o-audio-preview-2024-12-17",
1570                "gpt-4o-audio-preview-2025-06-03",
1571                "gpt-4o-mini-audio-preview",
1572                "gpt-4o-mini-audio-preview-2024-12-17",
1573                "gpt-4o-search-preview",
1574                "gpt-4o-mini-search-preview",
1575                "gpt-4o-search-preview-2025-03-11",
1576                "gpt-4o-mini-search-preview-2025-03-11",
1577                "chatgpt-4o-latest",
1578                "codex-mini-latest",
1579                "gpt-4o-mini",
1580                "gpt-4o-mini-2024-07-18",
1581                "gpt-4-turbo",
1582                "gpt-4-turbo-2024-04-09",
1583                "gpt-4-0125-preview",
1584                "gpt-4-turbo-preview",
1585                "gpt-4-1106-preview",
1586                "gpt-4-vision-preview",
1587                "gpt-4",
1588                "gpt-4-0314",
1589                "gpt-4-0613",
1590                "gpt-4-32k",
1591                "gpt-4-32k-0314",
1592                "gpt-4-32k-0613",
1593                "gpt-3.5-turbo",
1594                "gpt-3.5-turbo-16k",
1595                "gpt-3.5-turbo-0301",
1596                "gpt-3.5-turbo-0613",
1597                "gpt-3.5-turbo-1106",
1598                "gpt-3.5-turbo-0125",
1599                "gpt-3.5-turbo-16k-0613",
1600                "o1-pro",
1601                "o1-pro-2025-03-19",
1602                "o3-pro",
1603                "o3-pro-2025-06-10",
1604                "o3-deep-research",
1605                "o3-deep-research-2025-06-26",
1606                "o4-mini-deep-research",
1607                "o4-mini-deep-research-2025-06-26",
1608                "computer-use-preview",
1609                "computer-use-preview-2025-03-11",
1610                "gpt-5-codex",
1611                "gpt-5-pro",
1612                "gpt-5-pro-2025-10-06",
1613                "gpt-5.1-codex-max",
1614            ],
1615            str,
1616            None,
1617        ]
1618        | Omit = omit,
1619        previous_response_id: Optional[str] | Omit = omit,
1620        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1621        # The extra values given here take precedence over values defined on the client or passed to this method.
1622        extra_headers: Headers | None = None,
1623        extra_query: Query | None = None,
1624        extra_body: Body | None = None,
1625        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1626    ) -> CompactedResponse:
1627        """
1628        Compact conversation
1629
1630        Args:
1631          input: Text, image, or file inputs to the model, used to generate a response
1632
1633          instructions: A system (or developer) message inserted into the model's context. When used
1634              along with `previous_response_id`, the instructions from a previous response
1635              will not be carried over to the next response. This makes it simple to swap out
1636              system (or developer) messages in new responses.
1637
1638          model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
1639              wide range of models with different capabilities, performance characteristics,
1640              and price points. Refer to the
1641              [model guide](https://platform.openai.com/docs/models) to browse and compare
1642              available models.
1643
1644          previous_response_id: The unique ID of the previous response to the model. Use this to create
1645              multi-turn conversations. Learn more about
1646              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
1647              Cannot be used in conjunction with `conversation`.
1648
1649          extra_headers: Send extra headers
1650
1651          extra_query: Add additional query parameters to the request
1652
1653          extra_body: Add additional JSON properties to the request
1654
1655          timeout: Override the client-level default timeout for this request, in seconds
1656        """
1657        return self._post(
1658            "/responses/compact",
1659            body=maybe_transform(
1660                {
1661                    "input": input,
1662                    "instructions": instructions,
1663                    "model": model,
1664                    "previous_response_id": previous_response_id,
1665                },
1666                response_compact_params.ResponseCompactParams,
1667            ),
1668            options=make_request_options(
1669                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1670            ),
1671            cast_to=CompactedResponse,
1672        )
1673
1674
1675class AsyncResponses(AsyncAPIResource):
1676    @cached_property
1677    def input_items(self) -> AsyncInputItems:
1678        return AsyncInputItems(self._client)
1679
1680    @cached_property
1681    def input_tokens(self) -> AsyncInputTokens:
1682        return AsyncInputTokens(self._client)
1683
1684    @cached_property
1685    def with_raw_response(self) -> AsyncResponsesWithRawResponse:
1686        """
1687        This property can be used as a prefix for any HTTP method call to return
1688        the raw response object instead of the parsed content.
1689
1690        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
1691        """
1692        return AsyncResponsesWithRawResponse(self)
1693
1694    @cached_property
1695    def with_streaming_response(self) -> AsyncResponsesWithStreamingResponse:
1696        """
1697        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
1698
1699        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
1700        """
1701        return AsyncResponsesWithStreamingResponse(self)
1702
1703    @overload
1704    async def create(
1705        self,
1706        *,
1707        background: Optional[bool] | Omit = omit,
1708        conversation: Optional[response_create_params.Conversation] | Omit = omit,
1709        include: Optional[List[ResponseIncludable]] | Omit = omit,
1710        input: Union[str, ResponseInputParam] | Omit = omit,
1711        instructions: Optional[str] | Omit = omit,
1712        max_output_tokens: Optional[int] | Omit = omit,
1713        max_tool_calls: Optional[int] | Omit = omit,
1714        metadata: Optional[Metadata] | Omit = omit,
1715        model: ResponsesModel | Omit = omit,
1716        parallel_tool_calls: Optional[bool] | Omit = omit,
1717        previous_response_id: Optional[str] | Omit = omit,
1718        prompt: Optional[ResponsePromptParam] | Omit = omit,
1719        prompt_cache_key: str | Omit = omit,
1720        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1721        reasoning: Optional[Reasoning] | Omit = omit,
1722        safety_identifier: str | Omit = omit,
1723        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1724        store: Optional[bool] | Omit = omit,
1725        stream: Optional[Literal[False]] | Omit = omit,
1726        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
1727        temperature: Optional[float] | Omit = omit,
1728        text: ResponseTextConfigParam | Omit = omit,
1729        tool_choice: response_create_params.ToolChoice | Omit = omit,
1730        tools: Iterable[ToolParam] | Omit = omit,
1731        top_logprobs: Optional[int] | Omit = omit,
1732        top_p: Optional[float] | Omit = omit,
1733        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
1734        user: str | Omit = omit,
1735        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1736        # The extra values given here take precedence over values defined on the client or passed to this method.
1737        extra_headers: Headers | None = None,
1738        extra_query: Query | None = None,
1739        extra_body: Body | None = None,
1740        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1741    ) -> Response:
1742        """Creates a model response.
1743
1744        Provide
1745        [text](https://platform.openai.com/docs/guides/text) or
1746        [image](https://platform.openai.com/docs/guides/images) inputs to generate
1747        [text](https://platform.openai.com/docs/guides/text) or
1748        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
1749        the model call your own
1750        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
1751        built-in [tools](https://platform.openai.com/docs/guides/tools) like
1752        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
1753        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
1754        your own data as input for the model's response.
1755
1756        Args:
1757          background: Whether to run the model response in the background.
1758              [Learn more](https://platform.openai.com/docs/guides/background).
1759
1760          conversation: The conversation that this response belongs to. Items from this conversation are
1761              prepended to `input_items` for this response request. Input items and output
1762              items from this response are automatically added to this conversation after this
1763              response completes.
1764
1765          include: Specify additional output data to include in the model response. Currently
1766              supported values are:
1767
1768              - `web_search_call.action.sources`: Include the sources of the web search tool
1769                call.
1770              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
1771                in code interpreter tool call items.
1772              - `computer_call_output.output.image_url`: Include image urls from the computer
1773                call output.
1774              - `file_search_call.results`: Include the search results of the file search tool
1775                call.
1776              - `message.input_image.image_url`: Include image urls from the input message.
1777              - `message.output_text.logprobs`: Include logprobs with assistant messages.
1778              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
1779                tokens in reasoning item outputs. This enables reasoning items to be used in
1780                multi-turn conversations when using the Responses API statelessly (like when
1781                the `store` parameter is set to `false`, or when an organization is enrolled
1782                in the zero data retention program).
1783
1784          input: Text, image, or file inputs to the model, used to generate a response.
1785
1786              Learn more:
1787
1788              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
1789              - [Image inputs](https://platform.openai.com/docs/guides/images)
1790              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
1791              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
1792              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
1793
1794          instructions: A system (or developer) message inserted into the model's context.
1795
1796              When using along with `previous_response_id`, the instructions from a previous
1797              response will not be carried over to the next response. This makes it simple to
1798              swap out system (or developer) messages in new responses.
1799
1800          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
1801              including visible output tokens and
1802              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
1803
1804          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
1805              response. This maximum number applies across all built-in tool calls, not per
1806              individual tool. Any further attempts to call a tool by the model will be
1807              ignored.
1808
1809          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1810              for storing additional information about the object in a structured format, and
1811              querying for objects via API or the dashboard.
1812
1813              Keys are strings with a maximum length of 64 characters. Values are strings with
1814              a maximum length of 512 characters.
1815
1816          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
1817              wide range of models with different capabilities, performance characteristics,
1818              and price points. Refer to the
1819              [model guide](https://platform.openai.com/docs/models) to browse and compare
1820              available models.
1821
1822          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
1823
1824          previous_response_id: The unique ID of the previous response to the model. Use this to create
1825              multi-turn conversations. Learn more about
1826              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
1827              Cannot be used in conjunction with `conversation`.
1828
1829          prompt: Reference to a prompt template and its variables.
1830              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
1831
1832          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
1833              hit rates. Replaces the `user` field.
1834              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
1835
1836          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
1837              prompt caching, which keeps cached prefixes active for longer, up to a maximum
1838              of 24 hours.
1839              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
1840
1841          reasoning: **gpt-5 and o-series models only**
1842
1843              Configuration options for
1844              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
1845
1846          safety_identifier: A stable identifier used to help detect users of your application that may be
1847              violating OpenAI's usage policies. The IDs should be a string that uniquely
1848              identifies each user. We recommend hashing their username or email address, in
1849              order to avoid sending us any identifying information.
1850              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
1851
1852          service_tier: Specifies the processing type used for serving the request.
1853
1854              - If set to 'auto', then the request will be processed with the service tier
1855                configured in the Project settings. Unless otherwise configured, the Project
1856                will use 'default'.
1857              - If set to 'default', then the request will be processed with the standard
1858                pricing and performance for the selected model.
1859              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
1860                '[priority](https://openai.com/api-priority-processing/)', then the request
1861                will be processed with the corresponding service tier.
1862              - When not set, the default behavior is 'auto'.
1863
1864              When the `service_tier` parameter is set, the response body will include the
1865              `service_tier` value based on the processing mode actually used to serve the
1866              request. This response value may be different from the value set in the
1867              parameter.
1868
1869          store: Whether to store the generated model response for later retrieval via API.
1870
1871          stream: If set to true, the model response data will be streamed to the client as it is
1872              generated using
1873              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
1874              See the
1875              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
1876              for more information.
1877
1878          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
1879
1880          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1881              make the output more random, while lower values like 0.2 will make it more
1882              focused and deterministic. We generally recommend altering this or `top_p` but
1883              not both.
1884
1885          text: Configuration options for a text response from the model. Can be plain text or
1886              structured JSON data. Learn more:
1887
1888              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
1889              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
1890
1891          tool_choice: How the model should select which tool (or tools) to use when generating a
1892              response. See the `tools` parameter to see how to specify which tools the model
1893              can call.
1894
1895          tools: An array of tools the model may call while generating a response. You can
1896              specify which tool to use by setting the `tool_choice` parameter.
1897
1898              We support the following categories of tools:
1899
1900              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
1901                capabilities, like
1902                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
1903                [file search](https://platform.openai.com/docs/guides/tools-file-search).
1904                Learn more about
1905                [built-in tools](https://platform.openai.com/docs/guides/tools).
1906              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
1907                predefined connectors such as Google Drive and SharePoint. Learn more about
1908                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
1909              - **Function calls (custom tools)**: Functions that are defined by you, enabling
1910                the model to call your own code with strongly typed arguments and outputs.
1911                Learn more about
1912                [function calling](https://platform.openai.com/docs/guides/function-calling).
1913                You can also use custom tools to call your own code.
1914
1915          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
1916              return at each token position, each with an associated log probability.
1917
1918          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1919              model considers the results of the tokens with top_p probability mass. So 0.1
1920              means only the tokens comprising the top 10% probability mass are considered.
1921
1922              We generally recommend altering this or `temperature` but not both.
1923
1924          truncation: The truncation strategy to use for the model response.
1925
1926              - `auto`: If the input to this Response exceeds the model's context window size,
1927                the model will truncate the response to fit the context window by dropping
1928                items from the beginning of the conversation.
1929              - `disabled` (default): If the input size will exceed the context window size
1930                for a model, the request will fail with a 400 error.
1931
1932          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
1933              `prompt_cache_key` instead to maintain caching optimizations. A stable
1934              identifier for your end-users. Used to boost cache hit rates by better bucketing
1935              similar requests and to help OpenAI detect and prevent abuse.
1936              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
1937
1938          extra_headers: Send extra headers
1939
1940          extra_query: Add additional query parameters to the request
1941
1942          extra_body: Add additional JSON properties to the request
1943
1944          timeout: Override the client-level default timeout for this request, in seconds
1945        """
1946        ...
1947
1948    @overload
1949    async def create(
1950        self,
1951        *,
1952        stream: Literal[True],
1953        background: Optional[bool] | Omit = omit,
1954        conversation: Optional[response_create_params.Conversation] | Omit = omit,
1955        include: Optional[List[ResponseIncludable]] | Omit = omit,
1956        input: Union[str, ResponseInputParam] | Omit = omit,
1957        instructions: Optional[str] | Omit = omit,
1958        max_output_tokens: Optional[int] | Omit = omit,
1959        max_tool_calls: Optional[int] | Omit = omit,
1960        metadata: Optional[Metadata] | Omit = omit,
1961        model: ResponsesModel | Omit = omit,
1962        parallel_tool_calls: Optional[bool] | Omit = omit,
1963        previous_response_id: Optional[str] | Omit = omit,
1964        prompt: Optional[ResponsePromptParam] | Omit = omit,
1965        prompt_cache_key: str | Omit = omit,
1966        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1967        reasoning: Optional[Reasoning] | Omit = omit,
1968        safety_identifier: str | Omit = omit,
1969        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1970        store: Optional[bool] | Omit = omit,
1971        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
1972        temperature: Optional[float] | Omit = omit,
1973        text: ResponseTextConfigParam | Omit = omit,
1974        tool_choice: response_create_params.ToolChoice | Omit = omit,
1975        tools: Iterable[ToolParam] | Omit = omit,
1976        top_logprobs: Optional[int] | Omit = omit,
1977        top_p: Optional[float] | Omit = omit,
1978        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
1979        user: str | Omit = omit,
1980        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1981        # The extra values given here take precedence over values defined on the client or passed to this method.
1982        extra_headers: Headers | None = None,
1983        extra_query: Query | None = None,
1984        extra_body: Body | None = None,
1985        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1986    ) -> AsyncStream[ResponseStreamEvent]:
1987        """Creates a model response.
1988
1989        Provide
1990        [text](https://platform.openai.com/docs/guides/text) or
1991        [image](https://platform.openai.com/docs/guides/images) inputs to generate
1992        [text](https://platform.openai.com/docs/guides/text) or
1993        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
1994        the model call your own
1995        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
1996        built-in [tools](https://platform.openai.com/docs/guides/tools) like
1997        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
1998        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
1999        your own data as input for the model's response.
2000
2001        Args:
2002          stream: If set to true, the model response data will be streamed to the client as it is
2003              generated using
2004              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2005              See the
2006              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
2007              for more information.
2008
2009          background: Whether to run the model response in the background.
2010              [Learn more](https://platform.openai.com/docs/guides/background).
2011
2012          conversation: The conversation that this response belongs to. Items from this conversation are
2013              prepended to `input_items` for this response request. Input items and output
2014              items from this response are automatically added to this conversation after this
2015              response completes.
2016
2017          include: Specify additional output data to include in the model response. Currently
2018              supported values are:
2019
2020              - `web_search_call.action.sources`: Include the sources of the web search tool
2021                call.
2022              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
2023                in code interpreter tool call items.
2024              - `computer_call_output.output.image_url`: Include image urls from the computer
2025                call output.
2026              - `file_search_call.results`: Include the search results of the file search tool
2027                call.
2028              - `message.input_image.image_url`: Include image urls from the input message.
2029              - `message.output_text.logprobs`: Include logprobs with assistant messages.
2030              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
2031                tokens in reasoning item outputs. This enables reasoning items to be used in
2032                multi-turn conversations when using the Responses API statelessly (like when
2033                the `store` parameter is set to `false`, or when an organization is enrolled
2034                in the zero data retention program).
2035
2036          input: Text, image, or file inputs to the model, used to generate a response.
2037
2038              Learn more:
2039
2040              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
2041              - [Image inputs](https://platform.openai.com/docs/guides/images)
2042              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
2043              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
2044              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
2045
2046          instructions: A system (or developer) message inserted into the model's context.
2047
2048              When using along with `previous_response_id`, the instructions from a previous
2049              response will not be carried over to the next response. This makes it simple to
2050              swap out system (or developer) messages in new responses.
2051
2052          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
2053              including visible output tokens and
2054              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
2055
2056          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
2057              response. This maximum number applies across all built-in tool calls, not per
2058              individual tool. Any further attempts to call a tool by the model will be
2059              ignored.
2060
2061          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2062              for storing additional information about the object in a structured format, and
2063              querying for objects via API or the dashboard.
2064
2065              Keys are strings with a maximum length of 64 characters. Values are strings with
2066              a maximum length of 512 characters.
2067
2068          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
2069              wide range of models with different capabilities, performance characteristics,
2070              and price points. Refer to the
2071              [model guide](https://platform.openai.com/docs/models) to browse and compare
2072              available models.
2073
2074          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
2075
2076          previous_response_id: The unique ID of the previous response to the model. Use this to create
2077              multi-turn conversations. Learn more about
2078              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
2079              Cannot be used in conjunction with `conversation`.
2080
2081          prompt: Reference to a prompt template and its variables.
2082              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
2083
2084          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
2085              hit rates. Replaces the `user` field.
2086              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
2087
2088          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
2089              prompt caching, which keeps cached prefixes active for longer, up to a maximum
2090              of 24 hours.
2091              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
2092
2093          reasoning: **gpt-5 and o-series models only**
2094
2095              Configuration options for
2096              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
2097
2098          safety_identifier: A stable identifier used to help detect users of your application that may be
2099              violating OpenAI's usage policies. The IDs should be a string that uniquely
2100              identifies each user. We recommend hashing their username or email address, in
2101              order to avoid sending us any identifying information.
2102              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2103
2104          service_tier: Specifies the processing type used for serving the request.
2105
2106              - If set to 'auto', then the request will be processed with the service tier
2107                configured in the Project settings. Unless otherwise configured, the Project
2108                will use 'default'.
2109              - If set to 'default', then the request will be processed with the standard
2110                pricing and performance for the selected model.
2111              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
2112                '[priority](https://openai.com/api-priority-processing/)', then the request
2113                will be processed with the corresponding service tier.
2114              - When not set, the default behavior is 'auto'.
2115
2116              When the `service_tier` parameter is set, the response body will include the
2117              `service_tier` value based on the processing mode actually used to serve the
2118              request. This response value may be different from the value set in the
2119              parameter.
2120
2121          store: Whether to store the generated model response for later retrieval via API.
2122
2123          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
2124
2125          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
2126              make the output more random, while lower values like 0.2 will make it more
2127              focused and deterministic. We generally recommend altering this or `top_p` but
2128              not both.
2129
2130          text: Configuration options for a text response from the model. Can be plain text or
2131              structured JSON data. Learn more:
2132
2133              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
2134              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
2135
2136          tool_choice: How the model should select which tool (or tools) to use when generating a
2137              response. See the `tools` parameter to see how to specify which tools the model
2138              can call.
2139
2140          tools: An array of tools the model may call while generating a response. You can
2141              specify which tool to use by setting the `tool_choice` parameter.
2142
2143              We support the following categories of tools:
2144
2145              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
2146                capabilities, like
2147                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
2148                [file search](https://platform.openai.com/docs/guides/tools-file-search).
2149                Learn more about
2150                [built-in tools](https://platform.openai.com/docs/guides/tools).
2151              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
2152                predefined connectors such as Google Drive and SharePoint. Learn more about
2153                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
2154              - **Function calls (custom tools)**: Functions that are defined by you, enabling
2155                the model to call your own code with strongly typed arguments and outputs.
2156                Learn more about
2157                [function calling](https://platform.openai.com/docs/guides/function-calling).
2158                You can also use custom tools to call your own code.
2159
2160          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
2161              return at each token position, each with an associated log probability.
2162
2163          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
2164              model considers the results of the tokens with top_p probability mass. So 0.1
2165              means only the tokens comprising the top 10% probability mass are considered.
2166
2167              We generally recommend altering this or `temperature` but not both.
2168
2169          truncation: The truncation strategy to use for the model response.
2170
2171              - `auto`: If the input to this Response exceeds the model's context window size,
2172                the model will truncate the response to fit the context window by dropping
2173                items from the beginning of the conversation.
2174              - `disabled` (default): If the input size will exceed the context window size
2175                for a model, the request will fail with a 400 error.
2176
2177          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
2178              `prompt_cache_key` instead to maintain caching optimizations. A stable
2179              identifier for your end-users. Used to boost cache hit rates by better bucketing
2180              similar requests and to help OpenAI detect and prevent abuse.
2181              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2182
2183          extra_headers: Send extra headers
2184
2185          extra_query: Add additional query parameters to the request
2186
2187          extra_body: Add additional JSON properties to the request
2188
2189          timeout: Override the client-level default timeout for this request, in seconds
2190        """
2191        ...
2192
2193    @overload
2194    async def create(
2195        self,
2196        *,
2197        stream: bool,
2198        background: Optional[bool] | Omit = omit,
2199        conversation: Optional[response_create_params.Conversation] | Omit = omit,
2200        include: Optional[List[ResponseIncludable]] | Omit = omit,
2201        input: Union[str, ResponseInputParam] | Omit = omit,
2202        instructions: Optional[str] | Omit = omit,
2203        max_output_tokens: Optional[int] | Omit = omit,
2204        max_tool_calls: Optional[int] | Omit = omit,
2205        metadata: Optional[Metadata] | Omit = omit,
2206        model: ResponsesModel | Omit = omit,
2207        parallel_tool_calls: Optional[bool] | Omit = omit,
2208        previous_response_id: Optional[str] | Omit = omit,
2209        prompt: Optional[ResponsePromptParam] | Omit = omit,
2210        prompt_cache_key: str | Omit = omit,
2211        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2212        reasoning: Optional[Reasoning] | Omit = omit,
2213        safety_identifier: str | Omit = omit,
2214        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2215        store: Optional[bool] | Omit = omit,
2216        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
2217        temperature: Optional[float] | Omit = omit,
2218        text: ResponseTextConfigParam | Omit = omit,
2219        tool_choice: response_create_params.ToolChoice | Omit = omit,
2220        tools: Iterable[ToolParam] | Omit = omit,
2221        top_logprobs: Optional[int] | Omit = omit,
2222        top_p: Optional[float] | Omit = omit,
2223        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
2224        user: str | Omit = omit,
2225        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2226        # The extra values given here take precedence over values defined on the client or passed to this method.
2227        extra_headers: Headers | None = None,
2228        extra_query: Query | None = None,
2229        extra_body: Body | None = None,
2230        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2231    ) -> Response | AsyncStream[ResponseStreamEvent]:
2232        """Creates a model response.
2233
2234        Provide
2235        [text](https://platform.openai.com/docs/guides/text) or
2236        [image](https://platform.openai.com/docs/guides/images) inputs to generate
2237        [text](https://platform.openai.com/docs/guides/text) or
2238        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
2239        the model call your own
2240        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
2241        built-in [tools](https://platform.openai.com/docs/guides/tools) like
2242        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
2243        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
2244        your own data as input for the model's response.
2245
2246        Args:
2247          stream: If set to true, the model response data will be streamed to the client as it is
2248              generated using
2249              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2250              See the
2251              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
2252              for more information.
2253
2254          background: Whether to run the model response in the background.
2255              [Learn more](https://platform.openai.com/docs/guides/background).
2256
2257          conversation: The conversation that this response belongs to. Items from this conversation are
2258              prepended to `input_items` for this response request. Input items and output
2259              items from this response are automatically added to this conversation after this
2260              response completes.
2261
2262          include: Specify additional output data to include in the model response. Currently
2263              supported values are:
2264
2265              - `web_search_call.action.sources`: Include the sources of the web search tool
2266                call.
2267              - `code_interpreter_call.outputs`: Includes the outputs of python code execution
2268                in code interpreter tool call items.
2269              - `computer_call_output.output.image_url`: Include image urls from the computer
2270                call output.
2271              - `file_search_call.results`: Include the search results of the file search tool
2272                call.
2273              - `message.input_image.image_url`: Include image urls from the input message.
2274              - `message.output_text.logprobs`: Include logprobs with assistant messages.
2275              - `reasoning.encrypted_content`: Includes an encrypted version of reasoning
2276                tokens in reasoning item outputs. This enables reasoning items to be used in
2277                multi-turn conversations when using the Responses API statelessly (like when
2278                the `store` parameter is set to `false`, or when an organization is enrolled
2279                in the zero data retention program).
2280
2281          input: Text, image, or file inputs to the model, used to generate a response.
2282
2283              Learn more:
2284
2285              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
2286              - [Image inputs](https://platform.openai.com/docs/guides/images)
2287              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
2288              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
2289              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
2290
2291          instructions: A system (or developer) message inserted into the model's context.
2292
2293              When using along with `previous_response_id`, the instructions from a previous
2294              response will not be carried over to the next response. This makes it simple to
2295              swap out system (or developer) messages in new responses.
2296
2297          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
2298              including visible output tokens and
2299              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
2300
2301          max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
2302              response. This maximum number applies across all built-in tool calls, not per
2303              individual tool. Any further attempts to call a tool by the model will be
2304              ignored.
2305
2306          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2307              for storing additional information about the object in a structured format, and
2308              querying for objects via API or the dashboard.
2309
2310              Keys are strings with a maximum length of 64 characters. Values are strings with
2311              a maximum length of 512 characters.
2312
2313          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
2314              wide range of models with different capabilities, performance characteristics,
2315              and price points. Refer to the
2316              [model guide](https://platform.openai.com/docs/models) to browse and compare
2317              available models.
2318
2319          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
2320
2321          previous_response_id: The unique ID of the previous response to the model. Use this to create
2322              multi-turn conversations. Learn more about
2323              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
2324              Cannot be used in conjunction with `conversation`.
2325
2326          prompt: Reference to a prompt template and its variables.
2327              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
2328
2329          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
2330              hit rates. Replaces the `user` field.
2331              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
2332
2333          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
2334              prompt caching, which keeps cached prefixes active for longer, up to a maximum
2335              of 24 hours.
2336              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
2337
2338          reasoning: **gpt-5 and o-series models only**
2339
2340              Configuration options for
2341              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
2342
2343          safety_identifier: A stable identifier used to help detect users of your application that may be
2344              violating OpenAI's usage policies. The IDs should be a string that uniquely
2345              identifies each user. We recommend hashing their username or email address, in
2346              order to avoid sending us any identifying information.
2347              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2348
2349          service_tier: Specifies the processing type used for serving the request.
2350
2351              - If set to 'auto', then the request will be processed with the service tier
2352                configured in the Project settings. Unless otherwise configured, the Project
2353                will use 'default'.
2354              - If set to 'default', then the request will be processed with the standard
2355                pricing and performance for the selected model.
2356              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
2357                '[priority](https://openai.com/api-priority-processing/)', then the request
2358                will be processed with the corresponding service tier.
2359              - When not set, the default behavior is 'auto'.
2360
2361              When the `service_tier` parameter is set, the response body will include the
2362              `service_tier` value based on the processing mode actually used to serve the
2363              request. This response value may be different from the value set in the
2364              parameter.
2365
2366          store: Whether to store the generated model response for later retrieval via API.
2367
2368          stream_options: Options for streaming responses. Only set this when you set `stream: true`.
2369
2370          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
2371              make the output more random, while lower values like 0.2 will make it more
2372              focused and deterministic. We generally recommend altering this or `top_p` but
2373              not both.
2374
2375          text: Configuration options for a text response from the model. Can be plain text or
2376              structured JSON data. Learn more:
2377
2378              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
2379              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
2380
2381          tool_choice: How the model should select which tool (or tools) to use when generating a
2382              response. See the `tools` parameter to see how to specify which tools the model
2383              can call.
2384
2385          tools: An array of tools the model may call while generating a response. You can
2386              specify which tool to use by setting the `tool_choice` parameter.
2387
2388              We support the following categories of tools:
2389
2390              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
2391                capabilities, like
2392                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
2393                [file search](https://platform.openai.com/docs/guides/tools-file-search).
2394                Learn more about
2395                [built-in tools](https://platform.openai.com/docs/guides/tools).
2396              - **MCP Tools**: Integrations with third-party systems via custom MCP servers or
2397                predefined connectors such as Google Drive and SharePoint. Learn more about
2398                [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
2399              - **Function calls (custom tools)**: Functions that are defined by you, enabling
2400                the model to call your own code with strongly typed arguments and outputs.
2401                Learn more about
2402                [function calling](https://platform.openai.com/docs/guides/function-calling).
2403                You can also use custom tools to call your own code.
2404
2405          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
2406              return at each token position, each with an associated log probability.
2407
2408          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
2409              model considers the results of the tokens with top_p probability mass. So 0.1
2410              means only the tokens comprising the top 10% probability mass are considered.
2411
2412              We generally recommend altering this or `temperature` but not both.
2413
2414          truncation: The truncation strategy to use for the model response.
2415
2416              - `auto`: If the input to this Response exceeds the model's context window size,
2417                the model will truncate the response to fit the context window by dropping
2418                items from the beginning of the conversation.
2419              - `disabled` (default): If the input size will exceed the context window size
2420                for a model, the request will fail with a 400 error.
2421
2422          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
2423              `prompt_cache_key` instead to maintain caching optimizations. A stable
2424              identifier for your end-users. Used to boost cache hit rates by better bucketing
2425              similar requests and to help OpenAI detect and prevent abuse.
2426              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2427
2428          extra_headers: Send extra headers
2429
2430          extra_query: Add additional query parameters to the request
2431
2432          extra_body: Add additional JSON properties to the request
2433
2434          timeout: Override the client-level default timeout for this request, in seconds
2435        """
2436        ...
2437
2438    async def create(
2439        self,
2440        *,
2441        background: Optional[bool] | Omit = omit,
2442        conversation: Optional[response_create_params.Conversation] | Omit = omit,
2443        include: Optional[List[ResponseIncludable]] | Omit = omit,
2444        input: Union[str, ResponseInputParam] | Omit = omit,
2445        instructions: Optional[str] | Omit = omit,
2446        max_output_tokens: Optional[int] | Omit = omit,
2447        max_tool_calls: Optional[int] | Omit = omit,
2448        metadata: Optional[Metadata] | Omit = omit,
2449        model: ResponsesModel | Omit = omit,
2450        parallel_tool_calls: Optional[bool] | Omit = omit,
2451        previous_response_id: Optional[str] | Omit = omit,
2452        prompt: Optional[ResponsePromptParam] | Omit = omit,
2453        prompt_cache_key: str | Omit = omit,
2454        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2455        reasoning: Optional[Reasoning] | Omit = omit,
2456        safety_identifier: str | Omit = omit,
2457        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2458        store: Optional[bool] | Omit = omit,
2459        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
2460        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
2461        temperature: Optional[float] | Omit = omit,
2462        text: ResponseTextConfigParam | Omit = omit,
2463        tool_choice: response_create_params.ToolChoice | Omit = omit,
2464        tools: Iterable[ToolParam] | Omit = omit,
2465        top_logprobs: Optional[int] | Omit = omit,
2466        top_p: Optional[float] | Omit = omit,
2467        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
2468        user: str | Omit = omit,
2469        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2470        # The extra values given here take precedence over values defined on the client or passed to this method.
2471        extra_headers: Headers | None = None,
2472        extra_query: Query | None = None,
2473        extra_body: Body | None = None,
2474        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2475    ) -> Response | AsyncStream[ResponseStreamEvent]:
2476        return await self._post(
2477            "/responses",
2478            body=await async_maybe_transform(
2479                {
2480                    "background": background,
2481                    "conversation": conversation,
2482                    "include": include,
2483                    "input": input,
2484                    "instructions": instructions,
2485                    "max_output_tokens": max_output_tokens,
2486                    "max_tool_calls": max_tool_calls,
2487                    "metadata": metadata,
2488                    "model": model,
2489                    "parallel_tool_calls": parallel_tool_calls,
2490                    "previous_response_id": previous_response_id,
2491                    "prompt": prompt,
2492                    "prompt_cache_key": prompt_cache_key,
2493                    "prompt_cache_retention": prompt_cache_retention,
2494                    "reasoning": reasoning,
2495                    "safety_identifier": safety_identifier,
2496                    "service_tier": service_tier,
2497                    "store": store,
2498                    "stream": stream,
2499                    "stream_options": stream_options,
2500                    "temperature": temperature,
2501                    "text": text,
2502                    "tool_choice": tool_choice,
2503                    "tools": tools,
2504                    "top_logprobs": top_logprobs,
2505                    "top_p": top_p,
2506                    "truncation": truncation,
2507                    "user": user,
2508                },
2509                response_create_params.ResponseCreateParamsStreaming
2510                if stream
2511                else response_create_params.ResponseCreateParamsNonStreaming,
2512            ),
2513            options=make_request_options(
2514                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
2515            ),
2516            cast_to=Response,
2517            stream=stream or False,
2518            stream_cls=AsyncStream[ResponseStreamEvent],
2519        )
2520
2521    @overload
2522    def stream(
2523        self,
2524        *,
2525        response_id: str,
2526        text_format: type[TextFormatT] | Omit = omit,
2527        starting_after: int | Omit = omit,
2528        tools: Iterable[ParseableToolParam] | Omit = omit,
2529        # The extra values given here take precedence over values defined on the client or passed to this method.
2530        extra_headers: Headers | None = None,
2531        extra_query: Query | None = None,
2532        extra_body: Body | None = None,
2533        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2534    ) -> AsyncResponseStreamManager[TextFormatT]: ...
2535
2536    @overload
2537    def stream(
2538        self,
2539        *,
2540        input: Union[str, ResponseInputParam],
2541        model: ResponsesModel,
2542        background: Optional[bool] | Omit = omit,
2543        text_format: type[TextFormatT] | Omit = omit,
2544        tools: Iterable[ParseableToolParam] | Omit = omit,
2545        conversation: Optional[response_create_params.Conversation] | Omit = omit,
2546        include: Optional[List[ResponseIncludable]] | Omit = omit,
2547        instructions: Optional[str] | Omit = omit,
2548        max_output_tokens: Optional[int] | Omit = omit,
2549        max_tool_calls: Optional[int] | Omit = omit,
2550        metadata: Optional[Metadata] | Omit = omit,
2551        parallel_tool_calls: Optional[bool] | Omit = omit,
2552        previous_response_id: Optional[str] | Omit = omit,
2553        prompt: Optional[ResponsePromptParam] | Omit = omit,
2554        prompt_cache_key: str | Omit = omit,
2555        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2556        reasoning: Optional[Reasoning] | Omit = omit,
2557        safety_identifier: str | Omit = omit,
2558        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2559        store: Optional[bool] | Omit = omit,
2560        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
2561        temperature: Optional[float] | Omit = omit,
2562        text: ResponseTextConfigParam | Omit = omit,
2563        tool_choice: response_create_params.ToolChoice | Omit = omit,
2564        top_logprobs: Optional[int] | Omit = omit,
2565        top_p: Optional[float] | Omit = omit,
2566        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
2567        user: str | Omit = omit,
2568        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2569        # The extra values given here take precedence over values defined on the client or passed to this method.
2570        extra_headers: Headers | None = None,
2571        extra_query: Query | None = None,
2572        extra_body: Body | None = None,
2573        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2574    ) -> AsyncResponseStreamManager[TextFormatT]: ...
2575
2576    def stream(
2577        self,
2578        *,
2579        response_id: str | Omit = omit,
2580        input: Union[str, ResponseInputParam] | Omit = omit,
2581        model: ResponsesModel | Omit = omit,
2582        background: Optional[bool] | Omit = omit,
2583        text_format: type[TextFormatT] | Omit = omit,
2584        tools: Iterable[ParseableToolParam] | Omit = omit,
2585        conversation: Optional[response_create_params.Conversation] | Omit = omit,
2586        include: Optional[List[ResponseIncludable]] | Omit = omit,
2587        instructions: Optional[str] | Omit = omit,
2588        max_output_tokens: Optional[int] | Omit = omit,
2589        max_tool_calls: Optional[int] | Omit = omit,
2590        metadata: Optional[Metadata] | Omit = omit,
2591        parallel_tool_calls: Optional[bool] | Omit = omit,
2592        previous_response_id: Optional[str] | Omit = omit,
2593        prompt: Optional[ResponsePromptParam] | Omit = omit,
2594        prompt_cache_key: str | Omit = omit,
2595        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2596        reasoning: Optional[Reasoning] | Omit = omit,
2597        safety_identifier: str | Omit = omit,
2598        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2599        store: Optional[bool] | Omit = omit,
2600        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
2601        temperature: Optional[float] | Omit = omit,
2602        text: ResponseTextConfigParam | Omit = omit,
2603        tool_choice: response_create_params.ToolChoice | Omit = omit,
2604        top_logprobs: Optional[int] | Omit = omit,
2605        top_p: Optional[float] | Omit = omit,
2606        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
2607        user: str | Omit = omit,
2608        starting_after: int | Omit = omit,
2609        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2610        # The extra values given here take precedence over values defined on the client or passed to this method.
2611        extra_headers: Headers | None = None,
2612        extra_query: Query | None = None,
2613        extra_body: Body | None = None,
2614        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2615    ) -> AsyncResponseStreamManager[TextFormatT]:
2616        new_response_args = {
2617            "input": input,
2618            "model": model,
2619            "conversation": conversation,
2620            "include": include,
2621            "instructions": instructions,
2622            "max_output_tokens": max_output_tokens,
2623            "max_tool_calls": max_tool_calls,
2624            "metadata": metadata,
2625            "parallel_tool_calls": parallel_tool_calls,
2626            "previous_response_id": previous_response_id,
2627            "prompt": prompt,
2628            "prompt_cache_key": prompt_cache_key,
2629            "prompt_cache_retention": prompt_cache_retention,
2630            "reasoning": reasoning,
2631            "safety_identifier": safety_identifier,
2632            "service_tier": service_tier,
2633            "store": store,
2634            "stream_options": stream_options,
2635            "temperature": temperature,
2636            "text": text,
2637            "tool_choice": tool_choice,
2638            "top_logprobs": top_logprobs,
2639            "top_p": top_p,
2640            "truncation": truncation,
2641            "user": user,
2642            "background": background,
2643        }
2644        new_response_args_names = [k for k, v in new_response_args.items() if is_given(v)]
2645
2646        if (is_given(response_id) or is_given(starting_after)) and len(new_response_args_names) > 0:
2647            raise ValueError(
2648                "Cannot provide both response_id/starting_after can't be provided together with "
2649                + ", ".join(new_response_args_names)
2650            )
2651
2652        tools = _make_tools(tools)
2653        if len(new_response_args_names) > 0:
2654            if isinstance(input, NotGiven):
2655                raise ValueError("input must be provided when creating a new response")
2656
2657            if not is_given(model):
2658                raise ValueError("model must be provided when creating a new response")
2659
2660            if is_given(text_format):
2661                if not text:
2662                    text = {}
2663
2664                if "format" in text:
2665                    raise TypeError("Cannot mix and match text.format with text_format")
2666                text = copy(text)
2667                text["format"] = _type_to_text_format_param(text_format)
2668
2669            api_request = self.create(
2670                input=input,
2671                model=model,
2672                stream=True,
2673                tools=tools,
2674                conversation=conversation,
2675                include=include,
2676                instructions=instructions,
2677                max_output_tokens=max_output_tokens,
2678                max_tool_calls=max_tool_calls,
2679                metadata=metadata,
2680                parallel_tool_calls=parallel_tool_calls,
2681                previous_response_id=previous_response_id,
2682                prompt=prompt,
2683                prompt_cache_key=prompt_cache_key,
2684                prompt_cache_retention=prompt_cache_retention,
2685                store=store,
2686                stream_options=stream_options,
2687                temperature=temperature,
2688                text=text,
2689                tool_choice=tool_choice,
2690                reasoning=reasoning,
2691                safety_identifier=safety_identifier,
2692                service_tier=service_tier,
2693                top_logprobs=top_logprobs,
2694                top_p=top_p,
2695                truncation=truncation,
2696                user=user,
2697                background=background,
2698                extra_headers=extra_headers,
2699                extra_query=extra_query,
2700                extra_body=extra_body,
2701                timeout=timeout,
2702            )
2703
2704            return AsyncResponseStreamManager(
2705                api_request,
2706                text_format=text_format,
2707                input_tools=tools,
2708                starting_after=None,
2709            )
2710        else:
2711            if isinstance(response_id, Omit):
2712                raise ValueError("response_id must be provided when streaming an existing response")
2713
2714            api_request = self.retrieve(
2715                response_id,
2716                stream=True,
2717                include=include or [],
2718                extra_headers=extra_headers,
2719                extra_query=extra_query,
2720                extra_body=extra_body,
2721                timeout=timeout,
2722            )
2723            return AsyncResponseStreamManager(
2724                api_request,
2725                text_format=text_format,
2726                input_tools=tools,
2727                starting_after=starting_after if is_given(starting_after) else None,
2728            )
2729
2730    async def parse(
2731        self,
2732        *,
2733        text_format: type[TextFormatT] | Omit = omit,
2734        background: Optional[bool] | Omit = omit,
2735        conversation: Optional[response_create_params.Conversation] | Omit = omit,
2736        include: Optional[List[ResponseIncludable]] | Omit = omit,
2737        input: Union[str, ResponseInputParam] | Omit = omit,
2738        instructions: Optional[str] | Omit = omit,
2739        max_output_tokens: Optional[int] | Omit = omit,
2740        max_tool_calls: Optional[int] | Omit = omit,
2741        metadata: Optional[Metadata] | Omit = omit,
2742        model: ResponsesModel | Omit = omit,
2743        parallel_tool_calls: Optional[bool] | Omit = omit,
2744        previous_response_id: Optional[str] | Omit = omit,
2745        prompt: Optional[ResponsePromptParam] | Omit = omit,
2746        prompt_cache_key: str | Omit = omit,
2747        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2748        reasoning: Optional[Reasoning] | Omit = omit,
2749        safety_identifier: str | Omit = omit,
2750        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2751        store: Optional[bool] | Omit = omit,
2752        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
2753        stream_options: Optional[response_create_params.StreamOptions] | Omit = omit,
2754        temperature: Optional[float] | Omit = omit,
2755        text: ResponseTextConfigParam | Omit = omit,
2756        tool_choice: response_create_params.ToolChoice | Omit = omit,
2757        tools: Iterable[ParseableToolParam] | Omit = omit,
2758        top_logprobs: Optional[int] | Omit = omit,
2759        top_p: Optional[float] | Omit = omit,
2760        truncation: Optional[Literal["auto", "disabled"]] | Omit = omit,
2761        user: str | Omit = omit,
2762        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
2763        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2764        # The extra values given here take precedence over values defined on the client or passed to this method.
2765        extra_headers: Headers | None = None,
2766        extra_query: Query | None = None,
2767        extra_body: Body | None = None,
2768        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2769    ) -> ParsedResponse[TextFormatT]:
2770        if is_given(text_format):
2771            if not text:
2772                text = {}
2773
2774            if "format" in text:
2775                raise TypeError("Cannot mix and match text.format with text_format")
2776            text = copy(text)
2777            text["format"] = _type_to_text_format_param(text_format)
2778
2779        tools = _make_tools(tools)
2780
2781        def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
2782            return parse_response(
2783                input_tools=tools,
2784                text_format=text_format,
2785                response=raw_response,
2786            )
2787
2788        return await self._post(
2789            "/responses",
2790            body=maybe_transform(
2791                {
2792                    "background": background,
2793                    "conversation": conversation,
2794                    "include": include,
2795                    "input": input,
2796                    "instructions": instructions,
2797                    "max_output_tokens": max_output_tokens,
2798                    "max_tool_calls": max_tool_calls,
2799                    "metadata": metadata,
2800                    "model": model,
2801                    "parallel_tool_calls": parallel_tool_calls,
2802                    "previous_response_id": previous_response_id,
2803                    "prompt": prompt,
2804                    "prompt_cache_key": prompt_cache_key,
2805                    "prompt_cache_retention": prompt_cache_retention,
2806                    "reasoning": reasoning,
2807                    "safety_identifier": safety_identifier,
2808                    "service_tier": service_tier,
2809                    "store": store,
2810                    "stream": stream,
2811                    "stream_options": stream_options,
2812                    "temperature": temperature,
2813                    "text": text,
2814                    "tool_choice": tool_choice,
2815                    "tools": tools,
2816                    "top_logprobs": top_logprobs,
2817                    "top_p": top_p,
2818                    "truncation": truncation,
2819                    "user": user,
2820                    "verbosity": verbosity,
2821                },
2822                response_create_params.ResponseCreateParams,
2823            ),
2824            options=make_request_options(
2825                extra_headers=extra_headers,
2826                extra_query=extra_query,
2827                extra_body=extra_body,
2828                timeout=timeout,
2829                post_parser=parser,
2830            ),
2831            # we turn the `Response` instance into a `ParsedResponse`
2832            # in the `parser` function above
2833            cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
2834        )
2835
2836    @overload
2837    async def retrieve(
2838        self,
2839        response_id: str,
2840        *,
2841        include: List[ResponseIncludable] | Omit = omit,
2842        include_obfuscation: bool | Omit = omit,
2843        starting_after: int | Omit = omit,
2844        stream: Literal[False] | Omit = omit,
2845        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2846        # The extra values given here take precedence over values defined on the client or passed to this method.
2847        extra_headers: Headers | None = None,
2848        extra_query: Query | None = None,
2849        extra_body: Body | None = None,
2850        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2851    ) -> Response: ...
2852
2853    @overload
2854    async def retrieve(
2855        self,
2856        response_id: str,
2857        *,
2858        stream: Literal[True],
2859        include: List[ResponseIncludable] | Omit = omit,
2860        starting_after: int | Omit = omit,
2861        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2862        # The extra values given here take precedence over values defined on the client or passed to this method.
2863        extra_headers: Headers | None = None,
2864        extra_query: Query | None = None,
2865        extra_body: Body | None = None,
2866        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2867    ) -> AsyncStream[ResponseStreamEvent]: ...
2868
2869    @overload
2870    async def retrieve(
2871        self,
2872        response_id: str,
2873        *,
2874        stream: bool,
2875        include: List[ResponseIncludable] | Omit = omit,
2876        starting_after: int | Omit = omit,
2877        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2878        # The extra values given here take precedence over values defined on the client or passed to this method.
2879        extra_headers: Headers | None = None,
2880        extra_query: Query | None = None,
2881        extra_body: Body | None = None,
2882        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2883    ) -> Response | AsyncStream[ResponseStreamEvent]: ...
2884
2885    @overload
2886    async def retrieve(
2887        self,
2888        response_id: str,
2889        *,
2890        stream: bool = False,
2891        include: List[ResponseIncludable] | Omit = omit,
2892        starting_after: int | Omit = omit,
2893        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2894        # The extra values given here take precedence over values defined on the client or passed to this method.
2895        extra_headers: Headers | None = None,
2896        extra_query: Query | None = None,
2897        extra_body: Body | None = None,
2898        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
2899    ) -> Response | AsyncStream[ResponseStreamEvent]:
2900        """
2901        Retrieves a model response with the given ID.
2902
2903        Args:
2904          include: Additional fields to include in the response. See the `include` parameter for
2905              Response creation above for more information.
2906
2907          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
2908              characters to an `obfuscation` field on streaming delta events to normalize
2909              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
2910              fields are included by default, but add a small amount of overhead to the data
2911              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
2912              you trust the network links between your application and the OpenAI API.
2913
2914          starting_after: The sequence number of the event after which to start streaming.
2915
2916          stream: If set to true, the model response data will be streamed to the client as it is
2917              generated using
2918              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2919              See the
2920              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
2921              for more information.
2922
2923          extra_headers: Send extra headers
2924
2925          extra_query: Add additional query parameters to the request
2926
2927          extra_body: Add additional JSON properties to the request
2928
2929          timeout: Override the client-level default timeout for this request, in seconds
2930        """
2931        ...
2932
2933    @overload
2934    async def retrieve(
2935        self,
2936        response_id: str,
2937        *,
2938        stream: Literal[True],
2939        include: List[ResponseIncludable] | Omit = omit,
2940        include_obfuscation: bool | Omit = omit,
2941        starting_after: int | Omit = omit,
2942        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2943        # The extra values given here take precedence over values defined on the client or passed to this method.
2944        extra_headers: Headers | None = None,
2945        extra_query: Query | None = None,
2946        extra_body: Body | None = None,
2947        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2948    ) -> AsyncStream[ResponseStreamEvent]:
2949        """
2950        Retrieves a model response with the given ID.
2951
2952        Args:
2953          stream: If set to true, the model response data will be streamed to the client as it is
2954              generated using
2955              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2956              See the
2957              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
2958              for more information.
2959
2960          include: Additional fields to include in the response. See the `include` parameter for
2961              Response creation above for more information.
2962
2963          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
2964              characters to an `obfuscation` field on streaming delta events to normalize
2965              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
2966              fields are included by default, but add a small amount of overhead to the data
2967              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
2968              you trust the network links between your application and the OpenAI API.
2969
2970          starting_after: The sequence number of the event after which to start streaming.
2971
2972          extra_headers: Send extra headers
2973
2974          extra_query: Add additional query parameters to the request
2975
2976          extra_body: Add additional JSON properties to the request
2977
2978          timeout: Override the client-level default timeout for this request, in seconds
2979        """
2980        ...
2981
2982    @overload
2983    async def retrieve(
2984        self,
2985        response_id: str,
2986        *,
2987        stream: bool,
2988        include: List[ResponseIncludable] | Omit = omit,
2989        include_obfuscation: bool | Omit = omit,
2990        starting_after: int | Omit = omit,
2991        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2992        # The extra values given here take precedence over values defined on the client or passed to this method.
2993        extra_headers: Headers | None = None,
2994        extra_query: Query | None = None,
2995        extra_body: Body | None = None,
2996        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2997    ) -> Response | AsyncStream[ResponseStreamEvent]:
2998        """
2999        Retrieves a model response with the given ID.
3000
3001        Args:
3002          stream: If set to true, the model response data will be streamed to the client as it is
3003              generated using
3004              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
3005              See the
3006              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
3007              for more information.
3008
3009          include: Additional fields to include in the response. See the `include` parameter for
3010              Response creation above for more information.
3011
3012          include_obfuscation: When true, stream obfuscation will be enabled. Stream obfuscation adds random
3013              characters to an `obfuscation` field on streaming delta events to normalize
3014              payload sizes as a mitigation to certain side-channel attacks. These obfuscation
3015              fields are included by default, but add a small amount of overhead to the data
3016              stream. You can set `include_obfuscation` to false to optimize for bandwidth if
3017              you trust the network links between your application and the OpenAI API.
3018
3019          starting_after: The sequence number of the event after which to start streaming.
3020
3021          extra_headers: Send extra headers
3022
3023          extra_query: Add additional query parameters to the request
3024
3025          extra_body: Add additional JSON properties to the request
3026
3027          timeout: Override the client-level default timeout for this request, in seconds
3028        """
3029        ...
3030
3031    async def retrieve(
3032        self,
3033        response_id: str,
3034        *,
3035        include: List[ResponseIncludable] | Omit = omit,
3036        include_obfuscation: bool | Omit = omit,
3037        starting_after: int | Omit = omit,
3038        stream: Literal[False] | Literal[True] | Omit = omit,
3039        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
3040        # The extra values given here take precedence over values defined on the client or passed to this method.
3041        extra_headers: Headers | None = None,
3042        extra_query: Query | None = None,
3043        extra_body: Body | None = None,
3044        timeout: float | httpx.Timeout | None | NotGiven = not_given,
3045    ) -> Response | AsyncStream[ResponseStreamEvent]:
3046        if not response_id:
3047            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
3048        return await self._get(
3049            f"/responses/{response_id}",
3050            options=make_request_options(
3051                extra_headers=extra_headers,
3052                extra_query=extra_query,
3053                extra_body=extra_body,
3054                timeout=timeout,
3055                query=await async_maybe_transform(
3056                    {
3057                        "include": include,
3058                        "include_obfuscation": include_obfuscation,
3059                        "starting_after": starting_after,
3060                        "stream": stream,
3061                    },
3062                    response_retrieve_params.ResponseRetrieveParams,
3063                ),
3064            ),
3065            cast_to=Response,
3066            stream=stream or False,
3067            stream_cls=AsyncStream[ResponseStreamEvent],
3068        )
3069
3070    async def delete(
3071        self,
3072        response_id: str,
3073        *,
3074        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
3075        # The extra values given here take precedence over values defined on the client or passed to this method.
3076        extra_headers: Headers | None = None,
3077        extra_query: Query | None = None,
3078        extra_body: Body | None = None,
3079        timeout: float | httpx.Timeout | None | NotGiven = not_given,
3080    ) -> None:
3081        """
3082        Deletes a model response with the given ID.
3083
3084        Args:
3085          extra_headers: Send extra headers
3086
3087          extra_query: Add additional query parameters to the request
3088
3089          extra_body: Add additional JSON properties to the request
3090
3091          timeout: Override the client-level default timeout for this request, in seconds
3092        """
3093        if not response_id:
3094            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
3095        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
3096        return await self._delete(
3097            f"/responses/{response_id}",
3098            options=make_request_options(
3099                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
3100            ),
3101            cast_to=NoneType,
3102        )
3103
3104    async def cancel(
3105        self,
3106        response_id: str,
3107        *,
3108        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
3109        # The extra values given here take precedence over values defined on the client or passed to this method.
3110        extra_headers: Headers | None = None,
3111        extra_query: Query | None = None,
3112        extra_body: Body | None = None,
3113        timeout: float | httpx.Timeout | None | NotGiven = not_given,
3114    ) -> Response:
3115        """Cancels a model response with the given ID.
3116
3117        Only responses created with the
3118        `background` parameter set to `true` can be cancelled.
3119        [Learn more](https://platform.openai.com/docs/guides/background).
3120
3121        Args:
3122          extra_headers: Send extra headers
3123
3124          extra_query: Add additional query parameters to the request
3125
3126          extra_body: Add additional JSON properties to the request
3127
3128          timeout: Override the client-level default timeout for this request, in seconds
3129        """
3130        if not response_id:
3131            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
3132        return await self._post(
3133            f"/responses/{response_id}/cancel",
3134            options=make_request_options(
3135                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
3136            ),
3137            cast_to=Response,
3138        )
3139
3140    async def compact(
3141        self,
3142        *,
3143        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
3144        instructions: Optional[str] | Omit = omit,
3145        model: Union[
3146            Literal[
3147                "gpt-5.1",
3148                "gpt-5.1-2025-11-13",
3149                "gpt-5.1-codex",
3150                "gpt-5.1-mini",
3151                "gpt-5.1-chat-latest",
3152                "gpt-5",
3153                "gpt-5-mini",
3154                "gpt-5-nano",
3155                "gpt-5-2025-08-07",
3156                "gpt-5-mini-2025-08-07",
3157                "gpt-5-nano-2025-08-07",
3158                "gpt-5-chat-latest",
3159                "gpt-4.1",
3160                "gpt-4.1-mini",
3161                "gpt-4.1-nano",
3162                "gpt-4.1-2025-04-14",
3163                "gpt-4.1-mini-2025-04-14",
3164                "gpt-4.1-nano-2025-04-14",
3165                "o4-mini",
3166                "o4-mini-2025-04-16",
3167                "o3",
3168                "o3-2025-04-16",
3169                "o3-mini",
3170                "o3-mini-2025-01-31",
3171                "o1",
3172                "o1-2024-12-17",
3173                "o1-preview",
3174                "o1-preview-2024-09-12",
3175                "o1-mini",
3176                "o1-mini-2024-09-12",
3177                "gpt-4o",
3178                "gpt-4o-2024-11-20",
3179                "gpt-4o-2024-08-06",
3180                "gpt-4o-2024-05-13",
3181                "gpt-4o-audio-preview",
3182                "gpt-4o-audio-preview-2024-10-01",
3183                "gpt-4o-audio-preview-2024-12-17",
3184                "gpt-4o-audio-preview-2025-06-03",
3185                "gpt-4o-mini-audio-preview",
3186                "gpt-4o-mini-audio-preview-2024-12-17",
3187                "gpt-4o-search-preview",
3188                "gpt-4o-mini-search-preview",
3189                "gpt-4o-search-preview-2025-03-11",
3190                "gpt-4o-mini-search-preview-2025-03-11",
3191                "chatgpt-4o-latest",
3192                "codex-mini-latest",
3193                "gpt-4o-mini",
3194                "gpt-4o-mini-2024-07-18",
3195                "gpt-4-turbo",
3196                "gpt-4-turbo-2024-04-09",
3197                "gpt-4-0125-preview",
3198                "gpt-4-turbo-preview",
3199                "gpt-4-1106-preview",
3200                "gpt-4-vision-preview",
3201                "gpt-4",
3202                "gpt-4-0314",
3203                "gpt-4-0613",
3204                "gpt-4-32k",
3205                "gpt-4-32k-0314",
3206                "gpt-4-32k-0613",
3207                "gpt-3.5-turbo",
3208                "gpt-3.5-turbo-16k",
3209                "gpt-3.5-turbo-0301",
3210                "gpt-3.5-turbo-0613",
3211                "gpt-3.5-turbo-1106",
3212                "gpt-3.5-turbo-0125",
3213                "gpt-3.5-turbo-16k-0613",
3214                "o1-pro",
3215                "o1-pro-2025-03-19",
3216                "o3-pro",
3217                "o3-pro-2025-06-10",
3218                "o3-deep-research",
3219                "o3-deep-research-2025-06-26",
3220                "o4-mini-deep-research",
3221                "o4-mini-deep-research-2025-06-26",
3222                "computer-use-preview",
3223                "computer-use-preview-2025-03-11",
3224                "gpt-5-codex",
3225                "gpt-5-pro",
3226                "gpt-5-pro-2025-10-06",
3227                "gpt-5.1-codex-max",
3228            ],
3229            str,
3230            None,
3231        ]
3232        | Omit = omit,
3233        previous_response_id: Optional[str] | Omit = omit,
3234        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
3235        # The extra values given here take precedence over values defined on the client or passed to this method.
3236        extra_headers: Headers | None = None,
3237        extra_query: Query | None = None,
3238        extra_body: Body | None = None,
3239        timeout: float | httpx.Timeout | None | NotGiven = not_given,
3240    ) -> CompactedResponse:
3241        """
3242        Compact conversation
3243
3244        Args:
3245          input: Text, image, or file inputs to the model, used to generate a response
3246
3247          instructions: A system (or developer) message inserted into the model's context. When used
3248              along with `previous_response_id`, the instructions from a previous response
3249              will not be carried over to the next response. This makes it simple to swap out
3250              system (or developer) messages in new responses.
3251
3252          model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
3253              wide range of models with different capabilities, performance characteristics,
3254              and price points. Refer to the
3255              [model guide](https://platform.openai.com/docs/models) to browse and compare
3256              available models.
3257
3258          previous_response_id: The unique ID of the previous response to the model. Use this to create
3259              multi-turn conversations. Learn more about
3260              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
3261              Cannot be used in conjunction with `conversation`.
3262
3263          extra_headers: Send extra headers
3264
3265          extra_query: Add additional query parameters to the request
3266
3267          extra_body: Add additional JSON properties to the request
3268
3269          timeout: Override the client-level default timeout for this request, in seconds
3270        """
3271        return await self._post(
3272            "/responses/compact",
3273            body=await async_maybe_transform(
3274                {
3275                    "input": input,
3276                    "instructions": instructions,
3277                    "model": model,
3278                    "previous_response_id": previous_response_id,
3279                },
3280                response_compact_params.ResponseCompactParams,
3281            ),
3282            options=make_request_options(
3283                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
3284            ),
3285            cast_to=CompactedResponse,
3286        )
3287
3288
3289class ResponsesWithRawResponse:
3290    def __init__(self, responses: Responses) -> None:
3291        self._responses = responses
3292
3293        self.create = _legacy_response.to_raw_response_wrapper(
3294            responses.create,
3295        )
3296        self.retrieve = _legacy_response.to_raw_response_wrapper(
3297            responses.retrieve,
3298        )
3299        self.delete = _legacy_response.to_raw_response_wrapper(
3300            responses.delete,
3301        )
3302        self.cancel = _legacy_response.to_raw_response_wrapper(
3303            responses.cancel,
3304        )
3305        self.compact = _legacy_response.to_raw_response_wrapper(
3306            responses.compact,
3307        )
3308        self.parse = _legacy_response.to_raw_response_wrapper(
3309            responses.parse,
3310        )
3311
3312    @cached_property
3313    def input_items(self) -> InputItemsWithRawResponse:
3314        return InputItemsWithRawResponse(self._responses.input_items)
3315
3316    @cached_property
3317    def input_tokens(self) -> InputTokensWithRawResponse:
3318        return InputTokensWithRawResponse(self._responses.input_tokens)
3319
3320
3321class AsyncResponsesWithRawResponse:
3322    def __init__(self, responses: AsyncResponses) -> None:
3323        self._responses = responses
3324
3325        self.create = _legacy_response.async_to_raw_response_wrapper(
3326            responses.create,
3327        )
3328        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
3329            responses.retrieve,
3330        )
3331        self.delete = _legacy_response.async_to_raw_response_wrapper(
3332            responses.delete,
3333        )
3334        self.cancel = _legacy_response.async_to_raw_response_wrapper(
3335            responses.cancel,
3336        )
3337        self.compact = _legacy_response.async_to_raw_response_wrapper(
3338            responses.compact,
3339        )
3340        self.parse = _legacy_response.async_to_raw_response_wrapper(
3341            responses.parse,
3342        )
3343
3344    @cached_property
3345    def input_items(self) -> AsyncInputItemsWithRawResponse:
3346        return AsyncInputItemsWithRawResponse(self._responses.input_items)
3347
3348    @cached_property
3349    def input_tokens(self) -> AsyncInputTokensWithRawResponse:
3350        return AsyncInputTokensWithRawResponse(self._responses.input_tokens)
3351
3352
3353class ResponsesWithStreamingResponse:
3354    def __init__(self, responses: Responses) -> None:
3355        self._responses = responses
3356
3357        self.create = to_streamed_response_wrapper(
3358            responses.create,
3359        )
3360        self.retrieve = to_streamed_response_wrapper(
3361            responses.retrieve,
3362        )
3363        self.delete = to_streamed_response_wrapper(
3364            responses.delete,
3365        )
3366        self.cancel = to_streamed_response_wrapper(
3367            responses.cancel,
3368        )
3369        self.compact = to_streamed_response_wrapper(
3370            responses.compact,
3371        )
3372
3373    @cached_property
3374    def input_items(self) -> InputItemsWithStreamingResponse:
3375        return InputItemsWithStreamingResponse(self._responses.input_items)
3376
3377    @cached_property
3378    def input_tokens(self) -> InputTokensWithStreamingResponse:
3379        return InputTokensWithStreamingResponse(self._responses.input_tokens)
3380
3381
3382class AsyncResponsesWithStreamingResponse:
3383    def __init__(self, responses: AsyncResponses) -> None:
3384        self._responses = responses
3385
3386        self.create = async_to_streamed_response_wrapper(
3387            responses.create,
3388        )
3389        self.retrieve = async_to_streamed_response_wrapper(
3390            responses.retrieve,
3391        )
3392        self.delete = async_to_streamed_response_wrapper(
3393            responses.delete,
3394        )
3395        self.cancel = async_to_streamed_response_wrapper(
3396            responses.cancel,
3397        )
3398        self.compact = async_to_streamed_response_wrapper(
3399            responses.compact,
3400        )
3401
3402    @cached_property
3403    def input_items(self) -> AsyncInputItemsWithStreamingResponse:
3404        return AsyncInputItemsWithStreamingResponse(self._responses.input_items)
3405
3406    @cached_property
3407    def input_tokens(self) -> AsyncInputTokensWithStreamingResponse:
3408        return AsyncInputTokensWithStreamingResponse(self._responses.input_tokens)
3409
3410
3411def _make_tools(tools: Iterable[ParseableToolParam] | Omit) -> List[ToolParam] | Omit:
3412    if not is_given(tools):
3413        return omit
3414
3415    converted_tools: List[ToolParam] = []
3416    for tool in tools:
3417        if tool["type"] != "function":
3418            converted_tools.append(tool)
3419            continue
3420
3421        if "function" not in tool:
3422            # standard Responses API case
3423            converted_tools.append(tool)
3424            continue
3425
3426        function = cast(Any, tool)["function"]  # pyright: ignore[reportUnnecessaryCast]
3427        if not isinstance(function, PydanticFunctionTool):
3428            raise Exception(
3429                "Expected Chat Completions function tool shape to be created using `openai.pydantic_function_tool()`"
3430            )
3431
3432        assert "parameters" in function
3433        new_tool = ResponsesPydanticFunctionTool(
3434            {
3435                "type": "function",
3436                "name": function["name"],
3437                "description": function.get("description"),
3438                "parameters": function["parameters"],
3439                "strict": function.get("strict") or False,
3440            },
3441            function.model,
3442        )
3443
3444        converted_tools.append(new_tool.cast())
3445
3446    return converted_tools