Commit 653dfec4

stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>
2025-03-20 04:39:23
feat(api): o1-pro now available through the API (#2228)
1 parent bff8da9
src/openai/resources/responses/responses.py
@@ -44,6 +44,7 @@ from ...types.shared_params.reasoning import Reasoning
 from ...types.responses.parsed_response import ParsedResponse
 from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
 from ...types.responses.response_includable import ResponseIncludable
+from ...types.shared_params.responses_model import ResponsesModel
 from ...types.responses.response_input_param import ResponseInputParam
 from ...types.responses.response_stream_event import ResponseStreamEvent
 from ...types.responses.response_text_config_param import ResponseTextConfigParam
@@ -80,7 +81,7 @@ class Responses(SyncAPIResource):
         self,
         *,
         input: Union[str, ResponseInputParam],
-        model: Union[str, ChatModel],
+        model: ResponsesModel,
         include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -245,7 +246,7 @@ class Responses(SyncAPIResource):
         self,
         *,
         input: Union[str, ResponseInputParam],
-        model: Union[str, ChatModel],
+        model: ResponsesModel,
         stream: Literal[True],
         include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -410,7 +411,7 @@ class Responses(SyncAPIResource):
         self,
         *,
         input: Union[str, ResponseInputParam],
-        model: Union[str, ChatModel],
+        model: ResponsesModel,
         stream: bool,
         include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -575,7 +576,7 @@ class Responses(SyncAPIResource):
         self,
         *,
         input: Union[str, ResponseInputParam],
-        model: Union[str, ChatModel],
+        model: ResponsesModel,
         include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -892,7 +893,7 @@ class AsyncResponses(AsyncAPIResource):
         self,
         *,
         input: Union[str, ResponseInputParam],
-        model: Union[str, ChatModel],
+        model: ResponsesModel,
         include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
@@ -1057,7 +1058,7 @@ class AsyncResponses(AsyncAPIResource):
         self,
         *,
         input: Union[str, ResponseInputParam],
-        model: Union[str, ChatModel],
+        model: ResponsesModel,
         stream: Literal[True],
         include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -1222,7 +1223,7 @@ class AsyncResponses(AsyncAPIResource):
         self,
         *,
         input: Union[str, ResponseInputParam],
-        model: Union[str, ChatModel],
+        model: ResponsesModel,
         stream: bool,
         include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
@@ -1387,7 +1388,7 @@ class AsyncResponses(AsyncAPIResource):
         self,
         *,
         input: Union[str, ResponseInputParam],
-        model: Union[str, ChatModel],
+        model: ResponsesModel,
         include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
src/openai/resources/responses/responses.py.orig
@@ -0,0 +1,1796 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, List, Type, Union, Iterable, Optional, cast
+from functools import partial
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from ..._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .input_items import (
+    InputItems,
+    AsyncInputItems,
+    InputItemsWithRawResponse,
+    AsyncInputItemsWithRawResponse,
+    InputItemsWithStreamingResponse,
+    AsyncInputItemsWithStreamingResponse,
+)
+from ..._streaming import Stream, AsyncStream
+from ...lib._tools import PydanticFunctionTool, ResponsesPydanticFunctionTool
+from ..._base_client import make_request_options
+from ...types.responses import response_create_params, response_retrieve_params
+<<<<<<< HEAD
+from ...lib._parsing._responses import (
+    TextFormatT,
+    parse_response,
+    type_to_text_format_param as _type_to_text_format_param,
+)
+from ...types.shared.chat_model import ChatModel
+||||||| parent of 001707b8 (feat(api): o1-pro now available through the API (#2228))
+from ...types.shared.chat_model import ChatModel
+=======
+>>>>>>> 001707b8 (feat(api): o1-pro now available through the API (#2228))
+from ...types.responses.response import Response
+from ...types.responses.tool_param import ToolParam, ParseableToolParam
+from ...types.shared_params.metadata import Metadata
+from ...types.shared_params.reasoning import Reasoning
+from ...types.responses.parsed_response import ParsedResponse
+from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
+from ...types.responses.response_includable import ResponseIncludable
+from ...types.shared_params.responses_model import ResponsesModel
+from ...types.responses.response_input_param import ResponseInputParam
+from ...types.responses.response_stream_event import ResponseStreamEvent
+from ...types.responses.response_text_config_param import ResponseTextConfigParam
+
+__all__ = ["Responses", "AsyncResponses"]
+
+
+class Responses(SyncAPIResource):
+    @cached_property
+    def input_items(self) -> InputItems:
+        return InputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ResponsesWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: Literal[True],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: bool,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model"], ["input", "model", "stream"])
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        return self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=Stream[ResponseStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ResponseStreamManager[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        api_request: partial[Stream[ResponseStreamEvent]] = partial(
+            self.create,
+            input=input,
+            model=model,
+            tools=tools,
+            include=include,
+            instructions=instructions,
+            max_output_tokens=max_output_tokens,
+            metadata=metadata,
+            parallel_tool_calls=parallel_tool_calls,
+            previous_response_id=previous_response_id,
+            store=store,
+            stream=True,
+            temperature=temperature,
+            text=text,
+            tool_choice=tool_choice,
+            reasoning=reasoning,
+            top_p=top_p,
+            truncation=truncation,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+
+        return ResponseStreamManager(
+            api_request,
+            text_format=text_format,
+            input_tools=tools,
+        )
+
+    def parse(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedResponse[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
+            return parse_response(
+                input_tools=tools,
+                text_format=text_format,
+                response=raw_response,
+            )
+
+        return self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `Response` instance into a `ParsedResponse`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
+        )
+
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, response_retrieve_params.ResponseRetrieveParams),
+            ),
+            cast_to=Response,
+        )
+
+    def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncResponses(AsyncAPIResource):
+    @cached_property
+    def input_items(self) -> AsyncInputItems:
+        return AsyncInputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncResponsesWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: Literal[True],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        stream: bool,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model"], ["input", "model", "stream"])
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: ResponsesModel,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        return await self._post(
+            "/responses",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=AsyncStream[ResponseStreamEvent],
+        )
+
+    def stream(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncResponseStreamManager[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        api_request = self.create(
+            input=input,
+            model=model,
+            tools=tools,
+            include=include,
+            instructions=instructions,
+            max_output_tokens=max_output_tokens,
+            metadata=metadata,
+            parallel_tool_calls=parallel_tool_calls,
+            previous_response_id=previous_response_id,
+            store=store,
+            stream=True,
+            temperature=temperature,
+            text=text,
+            tool_choice=tool_choice,
+            reasoning=reasoning,
+            top_p=top_p,
+            truncation=truncation,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+
+        return AsyncResponseStreamManager(
+            api_request,
+            text_format=text_format,
+            input_tools=tools,
+        )
+
+    async def parse(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ParsedResponse[TextFormatT]:
+        if is_given(text_format):
+            if not text:
+                text = {}
+
+            if "format" in text:
+                raise TypeError("Cannot mix and match text.format with text_format")
+
+            text["format"] = _type_to_text_format_param(text_format)
+
+        tools = _make_tools(tools)
+
+        def parser(raw_response: Response) -> ParsedResponse[TextFormatT]:
+            return parse_response(
+                input_tools=tools,
+                text_format=text_format,
+                response=raw_response,
+            )
+
+        return await self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=parser,
+            ),
+            # we turn the `Response` instance into a `ParsedResponse`
+            # in the `parser` function above
+            cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
+        )
+
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return await self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {"include": include}, response_retrieve_params.ResponseRetrieveParams
+                ),
+            ),
+            cast_to=Response,
+        )
+
+    async def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class ResponsesWithRawResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithRawResponse:
+        return InputItemsWithRawResponse(self._responses.input_items)
+
+
+class AsyncResponsesWithRawResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithRawResponse:
+        return AsyncInputItemsWithRawResponse(self._responses.input_items)
+
+
+class ResponsesWithStreamingResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = to_streamed_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithStreamingResponse:
+        return InputItemsWithStreamingResponse(self._responses.input_items)
+
+
+class AsyncResponsesWithStreamingResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = async_to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithStreamingResponse:
+        return AsyncInputItemsWithStreamingResponse(self._responses.input_items)
+
+
+def _make_tools(tools: Iterable[ParseableToolParam] | NotGiven) -> List[ToolParam] | NotGiven:
+    if not is_given(tools):
+        return NOT_GIVEN
+
+    converted_tools: List[ToolParam] = []
+    for tool in tools:
+        if tool["type"] != "function":
+            converted_tools.append(tool)
+            continue
+
+        if "function" not in tool:
+            # standard Responses API case
+            converted_tools.append(tool)
+            continue
+
+        function = cast(Any, tool)["function"]  # pyright: ignore[reportUnnecessaryCast]
+        if not isinstance(function, PydanticFunctionTool):
+            raise Exception(
+                "Expected Chat Completions function tool shape to be created using `openai.pydantic_function_tool()`"
+            )
+
+        assert "parameters" in function
+        new_tool = ResponsesPydanticFunctionTool(
+            {
+                "type": "function",
+                "name": function["name"],
+                "description": function.get("description"),
+                "parameters": function["parameters"],
+                "strict": function.get("strict") or False,
+            },
+            function.model,
+        )
+
+        converted_tools.append(new_tool.cast())
+
+    return converted_tools
src/openai/types/responses/response.py
@@ -11,11 +11,11 @@ from .response_status import ResponseStatus
 from ..shared.metadata import Metadata
 from ..shared.reasoning import Reasoning
 from .tool_choice_types import ToolChoiceTypes
-from ..shared.chat_model import ChatModel
 from .tool_choice_options import ToolChoiceOptions
 from .response_output_item import ResponseOutputItem
 from .response_text_config import ResponseTextConfig
 from .tool_choice_function import ToolChoiceFunction
+from ..shared.responses_model import ResponsesModel
 
 __all__ = ["Response", "IncompleteDetails", "ToolChoice"]
 
@@ -61,7 +61,7 @@ class Response(BaseModel):
     a maximum length of 512 characters.
     """
 
-    model: Union[str, ChatModel]
+    model: ResponsesModel
     """Model ID used to generate the response, like `gpt-4o` or `o1`.
 
     OpenAI offers a wide range of models with different capabilities, performance
src/openai/types/responses/response_create_params.py
@@ -6,7 +6,6 @@ from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from .tool_param import ToolParam
-from ..shared.chat_model import ChatModel
 from .response_includable import ResponseIncludable
 from .tool_choice_options import ToolChoiceOptions
 from .response_input_param import ResponseInputParam
@@ -15,6 +14,7 @@ from .tool_choice_types_param import ToolChoiceTypesParam
 from ..shared_params.reasoning import Reasoning
 from .response_text_config_param import ResponseTextConfigParam
 from .tool_choice_function_param import ToolChoiceFunctionParam
+from ..shared_params.responses_model import ResponsesModel
 
 __all__ = [
     "ResponseCreateParamsBase",
@@ -37,7 +37,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     - [Function calling](https://platform.openai.com/docs/guides/function-calling)
     """
 
-    model: Required[Union[str, ChatModel]]
+    model: Required[ResponsesModel]
     """Model ID used to generate the response, like `gpt-4o` or `o1`.
 
     OpenAI offers a wide range of models with different capabilities, performance
src/openai/types/responses/response_function_tool_call_item.py
@@ -8,4 +8,4 @@ __all__ = ["ResponseFunctionToolCallItem"]
 
 class ResponseFunctionToolCallItem(ResponseFunctionToolCall):
     id: str  # type: ignore
-    """The unique ID of the function call tool output."""
+    """The unique ID of the function tool call."""
src/openai/types/shared/__init__.py
@@ -2,9 +2,11 @@
 
 from .metadata import Metadata as Metadata
 from .reasoning import Reasoning as Reasoning
+from .all_models import AllModels as AllModels
 from .chat_model import ChatModel as ChatModel
 from .error_object import ErrorObject as ErrorObject
 from .compound_filter import CompoundFilter as CompoundFilter
+from .responses_model import ResponsesModel as ResponsesModel
 from .reasoning_effort import ReasoningEffort as ReasoningEffort
 from .comparison_filter import ComparisonFilter as ComparisonFilter
 from .function_definition import FunctionDefinition as FunctionDefinition
src/openai/types/shared/all_models.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from .chat_model import ChatModel
+
+__all__ = ["AllModels"]
+
+AllModels: TypeAlias = Union[
+    str,
+    ChatModel,
+    str,
+    ChatModel,
+    Literal["o1-pro", "o1-pro-2025-03-19", "computer-use-preview", "computer-use-preview-2025-03-11"],
+]
src/openai/types/shared/chat_model.py
@@ -13,11 +13,6 @@ ChatModel: TypeAlias = Literal[
     "o1-preview-2024-09-12",
     "o1-mini",
     "o1-mini-2024-09-12",
-    "computer-use-preview",
-    "computer-use-preview-2025-02-04",
-    "computer-use-preview-2025-03-11",
-    "gpt-4.5-preview",
-    "gpt-4.5-preview-2025-02-27",
     "gpt-4o",
     "gpt-4o-2024-11-20",
     "gpt-4o-2024-08-06",
@@ -27,6 +22,10 @@ ChatModel: TypeAlias = Literal[
     "gpt-4o-audio-preview-2024-12-17",
     "gpt-4o-mini-audio-preview",
     "gpt-4o-mini-audio-preview-2024-12-17",
+    "gpt-4o-search-preview",
+    "gpt-4o-mini-search-preview",
+    "gpt-4o-search-preview-2025-03-11",
+    "gpt-4o-mini-search-preview-2025-03-11",
     "chatgpt-4o-latest",
     "gpt-4o-mini",
     "gpt-4o-mini-2024-07-18",
src/openai/types/shared/responses_model.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from .chat_model import ChatModel
+
+__all__ = ["ResponsesModel"]
+
+ResponsesModel: TypeAlias = Union[
+    str, ChatModel, Literal["o1-pro", "o1-pro-2025-03-19", "computer-use-preview", "computer-use-preview-2025-03-11"]
+]
src/openai/types/shared_params/__init__.py
@@ -4,6 +4,7 @@ from .metadata import Metadata as Metadata
 from .reasoning import Reasoning as Reasoning
 from .chat_model import ChatModel as ChatModel
 from .compound_filter import CompoundFilter as CompoundFilter
+from .responses_model import ResponsesModel as ResponsesModel
 from .reasoning_effort import ReasoningEffort as ReasoningEffort
 from .comparison_filter import ComparisonFilter as ComparisonFilter
 from .function_definition import FunctionDefinition as FunctionDefinition
src/openai/types/shared_params/chat_model.py
@@ -15,11 +15,6 @@ ChatModel: TypeAlias = Literal[
     "o1-preview-2024-09-12",
     "o1-mini",
     "o1-mini-2024-09-12",
-    "computer-use-preview",
-    "computer-use-preview-2025-02-04",
-    "computer-use-preview-2025-03-11",
-    "gpt-4.5-preview",
-    "gpt-4.5-preview-2025-02-27",
     "gpt-4o",
     "gpt-4o-2024-11-20",
     "gpt-4o-2024-08-06",
@@ -29,6 +24,10 @@ ChatModel: TypeAlias = Literal[
     "gpt-4o-audio-preview-2024-12-17",
     "gpt-4o-mini-audio-preview",
     "gpt-4o-mini-audio-preview-2024-12-17",
+    "gpt-4o-search-preview",
+    "gpt-4o-mini-search-preview",
+    "gpt-4o-search-preview-2025-03-11",
+    "gpt-4o-mini-search-preview-2025-03-11",
     "chatgpt-4o-latest",
     "gpt-4o-mini",
     "gpt-4o-mini-2024-07-18",
src/openai/types/shared_params/responses_model.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from ..shared.chat_model import ChatModel
+
+__all__ = ["ResponsesModel"]
+
+ResponsesModel: TypeAlias = Union[
+    str, ChatModel, Literal["o1-pro", "o1-pro-2025-03-19", "computer-use-preview", "computer-use-preview-2025-03-11"]
+]
src/openai/types/__init__.py
@@ -7,10 +7,12 @@ from .image import Image as Image
 from .model import Model as Model
 from .shared import (
     Metadata as Metadata,
+    AllModels as AllModels,
     ChatModel as ChatModel,
     Reasoning as Reasoning,
     ErrorObject as ErrorObject,
     CompoundFilter as CompoundFilter,
+    ResponsesModel as ResponsesModel,
     ReasoningEffort as ReasoningEffort,
     ComparisonFilter as ComparisonFilter,
     FunctionDefinition as FunctionDefinition,
.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 81
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f3bce04386c4fcfd5037e0477fbaa39010003fd1558eb5185fe4a71dd6a05fdd.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-b26121d5df6eb5d3032a45a267473798b15fcfec76dd44a3256cf1238be05fa4.yml
api.md
@@ -2,6 +2,7 @@
 
 ```python
 from openai.types import (
+    AllModels,
     ChatModel,
     ComparisonFilter,
     CompoundFilter,
@@ -14,6 +15,7 @@ from openai.types import (
     ResponseFormatJSONObject,
     ResponseFormatJSONSchema,
     ResponseFormatText,
+    ResponsesModel,
 )
 ```