Commit `62a184d7`

@@ -0,0 +1,54 @@
+from typing import List
+
+from openai import OpenAI
+from openai.types.responses.tool_param import ToolParam
+from openai.types.responses.response_input_item_param import ResponseInputItemParam
+
+
+def main() -> None:
+    client = OpenAI()
+    tools: List[ToolParam] = [
+        {
+            "type": "function",
+            "name": "get_current_weather",
+            "description": "Get current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "City and state, e.g. San Francisco, CA",
+                    },
+                    "unit": {
+                        "type": "string",
+                        "enum": ["c", "f"],
+                        "description": "Temperature unit to use",
+                    },
+                },
+                "required": ["location", "unit"],
+                "additionalProperties": False,
+            },
+            "strict": True,
+        }
+    ]
+
+    input_items: List[ResponseInputItemParam] = [
+        {
+            "type": "message",
+            "role": "user",
+            "content": [{"type": "input_text", "text": "What's the weather in San Francisco today?"}],
+        }
+    ]
+
+    response = client.responses.input_tokens.count(
+        model="gpt-5",
+        instructions="You are a concise assistant.",
+        input=input_items,
+        tools=tools,
+        tool_choice={"type": "function", "name": "get_current_weather"},
+    )
+    print(f"input tokens: {response.input_tokens}")
+
+
+if __name__ == "__main__":
+    main()

@@ -16,6 +16,14 @@ from .input_items import (
     InputItemsWithStreamingResponse,
     AsyncInputItemsWithStreamingResponse,
 )
+from .input_tokens import (
+    InputTokens,
+    AsyncInputTokens,
+    InputTokensWithRawResponse,
+    AsyncInputTokensWithRawResponse,
+    InputTokensWithStreamingResponse,
+    AsyncInputTokensWithStreamingResponse,
+)
 
 __all__ = [
     "InputItems",
@@ -24,6 +32,12 @@ __all__ = [
     "AsyncInputItemsWithRawResponse",
     "InputItemsWithStreamingResponse",
     "AsyncInputItemsWithStreamingResponse",
+    "InputTokens",
+    "AsyncInputTokens",
+    "InputTokensWithRawResponse",
+    "AsyncInputTokensWithRawResponse",
+    "InputTokensWithStreamingResponse",
+    "AsyncInputTokensWithStreamingResponse",
     "Responses",
     "AsyncResponses",
     "ResponsesWithRawResponse",

@@ -0,0 +1,309 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.responses import input_token_count_params
+from ...types.responses.tool_param import ToolParam
+from ...types.shared_params.reasoning import Reasoning
+from ...types.responses.response_input_item_param import ResponseInputItemParam
+from ...types.responses.input_token_count_response import InputTokenCountResponse
+
+__all__ = ["InputTokens", "AsyncInputTokens"]
+
+
+class InputTokens(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> InputTokensWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return InputTokensWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> InputTokensWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return InputTokensWithStreamingResponse(self)
+
+    def count(
+        self,
+        *,
+        conversation: Optional[input_token_count_params.Conversation] | Omit = omit,
+        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        model: Optional[str] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        text: Optional[input_token_count_params.Text] | Omit = omit,
+        tool_choice: Optional[input_token_count_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[ToolParam]] | Omit = omit,
+        truncation: Literal["auto", "disabled"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> InputTokenCountResponse:
+        """
+        Get input token counts
+
+        Args:
+          conversation: The conversation that this response belongs to. Items from this conversation are
+              prepended to `input_items` for this response request. Input items and output
+              items from this response are automatically added to this conversation after this
+              response completes.
+
+          input: Text, image, or file inputs to the model, used to generate a response
+
+          instructions: A system (or developer) message inserted into the model's context. When used
+              along with `previous_response_id`, the instructions from a previous response
+              will not be carried over to the next response. This makes it simple to swap out
+              system (or developer) messages in new responses.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          reasoning: **gpt-5 and o-series models only** Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+          truncation: The truncation strategy to use for the model response. - `auto`: If the input to
+              this Response exceeds the model's context window size, the model will truncate
+              the response to fit the context window by dropping items from the beginning of
+              the conversation. - `disabled` (default): If the input size will exceed the
+              context window size for a model, the request will fail with a 400 error.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/responses/input_tokens",
+            body=maybe_transform(
+                {
+                    "conversation": conversation,
+                    "input": input,
+                    "instructions": instructions,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "truncation": truncation,
+                },
+                input_token_count_params.InputTokenCountParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=InputTokenCountResponse,
+        )
+
+
+class AsyncInputTokens(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncInputTokensWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncInputTokensWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncInputTokensWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncInputTokensWithStreamingResponse(self)
+
+    async def count(
+        self,
+        *,
+        conversation: Optional[input_token_count_params.Conversation] | Omit = omit,
+        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        model: Optional[str] | Omit = omit,
+        parallel_tool_calls: Optional[bool] | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        reasoning: Optional[Reasoning] | Omit = omit,
+        text: Optional[input_token_count_params.Text] | Omit = omit,
+        tool_choice: Optional[input_token_count_params.ToolChoice] | Omit = omit,
+        tools: Optional[Iterable[ToolParam]] | Omit = omit,
+        truncation: Literal["auto", "disabled"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> InputTokenCountResponse:
+        """
+        Get input token counts
+
+        Args:
+          conversation: The conversation that this response belongs to. Items from this conversation are
+              prepended to `input_items` for this response request. Input items and output
+              items from this response are automatically added to this conversation after this
+              response completes.
+
+          input: Text, image, or file inputs to the model, used to generate a response
+
+          instructions: A system (or developer) message inserted into the model's context. When used
+              along with `previous_response_id`, the instructions from a previous response
+              will not be carried over to the next response. This makes it simple to swap out
+              system (or developer) messages in new responses.
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          reasoning: **gpt-5 and o-series models only** Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+          truncation: The truncation strategy to use for the model response. - `auto`: If the input to
+              this Response exceeds the model's context window size, the model will truncate
+              the response to fit the context window by dropping items from the beginning of
+              the conversation. - `disabled` (default): If the input size will exceed the
+              context window size for a model, the request will fail with a 400 error.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/responses/input_tokens",
+            body=await async_maybe_transform(
+                {
+                    "conversation": conversation,
+                    "input": input,
+                    "instructions": instructions,
+                    "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "truncation": truncation,
+                },
+                input_token_count_params.InputTokenCountParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=InputTokenCountResponse,
+        )
+
+
+class InputTokensWithRawResponse:
+    def __init__(self, input_tokens: InputTokens) -> None:
+        self._input_tokens = input_tokens
+
+        self.count = _legacy_response.to_raw_response_wrapper(
+            input_tokens.count,
+        )
+
+
+class AsyncInputTokensWithRawResponse:
+    def __init__(self, input_tokens: AsyncInputTokens) -> None:
+        self._input_tokens = input_tokens
+
+        self.count = _legacy_response.async_to_raw_response_wrapper(
+            input_tokens.count,
+        )
+
+
+class InputTokensWithStreamingResponse:
+    def __init__(self, input_tokens: InputTokens) -> None:
+        self._input_tokens = input_tokens
+
+        self.count = to_streamed_response_wrapper(
+            input_tokens.count,
+        )
+
+
+class AsyncInputTokensWithStreamingResponse:
+    def __init__(self, input_tokens: AsyncInputTokens) -> None:
+        self._input_tokens = input_tokens
+
+        self.count = async_to_streamed_response_wrapper(
+            input_tokens.count,
+        )

@@ -24,6 +24,14 @@ from .input_items import (
 )
 from ..._streaming import Stream, AsyncStream
 from ...lib._tools import PydanticFunctionTool, ResponsesPydanticFunctionTool
+from .input_tokens import (
+    InputTokens,
+    AsyncInputTokens,
+    InputTokensWithRawResponse,
+    AsyncInputTokensWithRawResponse,
+    InputTokensWithStreamingResponse,
+    AsyncInputTokensWithStreamingResponse,
+)
 from ..._base_client import make_request_options
 from ...types.responses import response_create_params, response_retrieve_params
 from ...lib._parsing._responses import (
@@ -52,6 +60,10 @@ class Responses(SyncAPIResource):
     def input_items(self) -> InputItems:
         return InputItems(self._client)
 
+    @cached_property
+    def input_tokens(self) -> InputTokens:
+        return InputTokens(self._client)
+
     @cached_property
     def with_raw_response(self) -> ResponsesWithRawResponse:
         """
@@ -1483,6 +1495,10 @@ class AsyncResponses(AsyncAPIResource):
     def input_items(self) -> AsyncInputItems:
         return AsyncInputItems(self._client)
 
+    @cached_property
+    def input_tokens(self) -> AsyncInputTokens:
+        return AsyncInputTokens(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncResponsesWithRawResponse:
         """
@@ -2938,6 +2954,10 @@ class ResponsesWithRawResponse:
     def input_items(self) -> InputItemsWithRawResponse:
         return InputItemsWithRawResponse(self._responses.input_items)
 
+    @cached_property
+    def input_tokens(self) -> InputTokensWithRawResponse:
+        return InputTokensWithRawResponse(self._responses.input_tokens)
+
 
 class AsyncResponsesWithRawResponse:
     def __init__(self, responses: AsyncResponses) -> None:
@@ -2963,6 +2983,10 @@ class AsyncResponsesWithRawResponse:
     def input_items(self) -> AsyncInputItemsWithRawResponse:
         return AsyncInputItemsWithRawResponse(self._responses.input_items)
 
+    @cached_property
+    def input_tokens(self) -> AsyncInputTokensWithRawResponse:
+        return AsyncInputTokensWithRawResponse(self._responses.input_tokens)
+
 
 class ResponsesWithStreamingResponse:
     def __init__(self, responses: Responses) -> None:
@@ -2985,6 +3009,10 @@ class ResponsesWithStreamingResponse:
     def input_items(self) -> InputItemsWithStreamingResponse:
         return InputItemsWithStreamingResponse(self._responses.input_items)
 
+    @cached_property
+    def input_tokens(self) -> InputTokensWithStreamingResponse:
+        return InputTokensWithStreamingResponse(self._responses.input_tokens)
+
 
 class AsyncResponsesWithStreamingResponse:
     def __init__(self, responses: AsyncResponses) -> None:
@@ -3007,6 +3035,10 @@ class AsyncResponsesWithStreamingResponse:
     def input_items(self) -> AsyncInputItemsWithStreamingResponse:
         return AsyncInputItemsWithStreamingResponse(self._responses.input_items)
 
+    @cached_property
+    def input_tokens(self) -> AsyncInputTokensWithStreamingResponse:
+        return AsyncInputTokensWithStreamingResponse(self._responses.input_tokens)
+
 
 def _make_tools(tools: Iterable[ParseableToolParam] | Omit) -> List[ToolParam] | Omit:
     if not is_given(tools):

@@ -62,6 +62,7 @@ from .response_reasoning_item import ResponseReasoningItem as ResponseReasoningI
 from .tool_choice_types_param import ToolChoiceTypesParam as ToolChoiceTypesParam
 from .web_search_preview_tool import WebSearchPreviewTool as WebSearchPreviewTool
 from .easy_input_message_param import EasyInputMessageParam as EasyInputMessageParam
+from .input_token_count_params import InputTokenCountParams as InputTokenCountParams
 from .response_completed_event import ResponseCompletedEvent as ResponseCompletedEvent
 from .response_retrieve_params import ResponseRetrieveParams as ResponseRetrieveParams
 from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
@@ -74,6 +75,7 @@ from .response_input_item_param import ResponseInputItemParam as ResponseInputIt
 from .response_input_text_param import ResponseInputTextParam as ResponseInputTextParam
 from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
 from .tool_choice_allowed_param import ToolChoiceAllowedParam as ToolChoiceAllowedParam
+from .input_token_count_response import InputTokenCountResponse as InputTokenCountResponse
 from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent
 from .response_in_progress_event import ResponseInProgressEvent as ResponseInProgressEvent
 from .response_input_audio_param import ResponseInputAudioParam as ResponseInputAudioParam

@@ -0,0 +1,138 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, TypeAlias, TypedDict
+
+from .tool_param import ToolParam
+from .tool_choice_options import ToolChoiceOptions
+from .tool_choice_mcp_param import ToolChoiceMcpParam
+from .tool_choice_types_param import ToolChoiceTypesParam
+from ..shared_params.reasoning import Reasoning
+from .tool_choice_custom_param import ToolChoiceCustomParam
+from .response_input_item_param import ResponseInputItemParam
+from .tool_choice_allowed_param import ToolChoiceAllowedParam
+from .tool_choice_function_param import ToolChoiceFunctionParam
+from .response_conversation_param import ResponseConversationParam
+from .response_format_text_config_param import ResponseFormatTextConfigParam
+
+__all__ = ["InputTokenCountParams", "Conversation", "Text", "ToolChoice"]
+
+
+class InputTokenCountParams(TypedDict, total=False):
+    conversation: Optional[Conversation]
+    """The conversation that this response belongs to.
+
+    Items from this conversation are prepended to `input_items` for this response
+    request. Input items and output items from this response are automatically added
+    to this conversation after this response completes.
+    """
+
+    input: Union[str, Iterable[ResponseInputItemParam], None]
+    """Text, image, or file inputs to the model, used to generate a response"""
+
+    instructions: Optional[str]
+    """
+    A system (or developer) message inserted into the model's context. When used
+    along with `previous_response_id`, the instructions from a previous response
+    will not be carried over to the next response. This makes it simple to swap out
+    system (or developer) messages in new responses.
+    """
+
+    model: Optional[str]
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    parallel_tool_calls: Optional[bool]
+    """Whether to allow the model to run tool calls in parallel."""
+
+    previous_response_id: Optional[str]
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    Cannot be used in conjunction with `conversation`.
+    """
+
+    reasoning: Optional[Reasoning]
+    """
+    **gpt-5 and o-series models only** Configuration options for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    text: Optional[Text]
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tool_choice: Optional[ToolChoice]
+    """
+    How the model should select which tool (or tools) to use when generating a
+    response. See the `tools` parameter to see how to specify which tools the model
+    can call.
+    """
+
+    tools: Optional[Iterable[ToolParam]]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+    """
+
+    truncation: Literal["auto", "disabled"]
+    """The truncation strategy to use for the model response.
+
+    - `auto`: If the input to this Response exceeds the model's context window size,
+      the model will truncate the response to fit the context window by dropping
+      items from the beginning of the conversation. - `disabled` (default): If the
+      input size will exceed the context window size for a model, the request will
+      fail with a 400 error.
+    """
+
+
+Conversation: TypeAlias = Union[str, ResponseConversationParam]
+
+
+class Text(TypedDict, total=False):
+    format: ResponseFormatTextConfigParam
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
+
+    verbosity: Optional[Literal["low", "medium", "high"]]
+    """Constrains the verbosity of the model's response.
+
+    Lower values will result in more concise responses, while higher values will
+    result in more verbose responses. Currently supported values are `low`,
+    `medium`, and `high`.
+    """
+
+
+ToolChoice: TypeAlias = Union[
+    ToolChoiceOptions,
+    ToolChoiceAllowedParam,
+    ToolChoiceTypesParam,
+    ToolChoiceFunctionParam,
+    ToolChoiceMcpParam,
+    ToolChoiceCustomParam,
+]

@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputTokenCountResponse"]
+
+
+class InputTokenCountResponse(BaseModel):
+    input_tokens: int
+
+    object: Literal["response.input_tokens"]

@@ -0,0 +1,138 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.responses import InputTokenCountResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestInputTokens:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_count(self, client: OpenAI) -> None:
+        input_token = client.responses.input_tokens.count()
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    def test_method_count_with_all_params(self, client: OpenAI) -> None:
+        input_token = client.responses.input_tokens.count(
+            conversation="string",
+            input="string",
+            instructions="instructions",
+            model="model",
+            parallel_tool_calls=True,
+            previous_response_id="resp_123",
+            reasoning={
+                "effort": "minimal",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            text={
+                "format": {"type": "text"},
+                "verbosity": "low",
+            },
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            truncation="auto",
+        )
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    def test_raw_response_count(self, client: OpenAI) -> None:
+        response = client.responses.input_tokens.with_raw_response.count()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        input_token = response.parse()
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    def test_streaming_response_count(self, client: OpenAI) -> None:
+        with client.responses.input_tokens.with_streaming_response.count() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            input_token = response.parse()
+            assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncInputTokens:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_count(self, async_client: AsyncOpenAI) -> None:
+        input_token = await async_client.responses.input_tokens.count()
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    async def test_method_count_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        input_token = await async_client.responses.input_tokens.count(
+            conversation="string",
+            input="string",
+            instructions="instructions",
+            model="model",
+            parallel_tool_calls=True,
+            previous_response_id="resp_123",
+            reasoning={
+                "effort": "minimal",
+                "generate_summary": "auto",
+                "summary": "auto",
+            },
+            text={
+                "format": {"type": "text"},
+                "verbosity": "low",
+            },
+            tool_choice="none",
+            tools=[
+                {
+                    "name": "name",
+                    "parameters": {"foo": "bar"},
+                    "strict": True,
+                    "type": "function",
+                    "description": "description",
+                }
+            ],
+            truncation="auto",
+        )
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    async def test_raw_response_count(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.input_tokens.with_raw_response.count()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        input_token = response.parse()
+        assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_count(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.input_tokens.with_streaming_response.count() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            input_token = await response.parse()
+            assert_matches_type(InputTokenCountResponse, input_token, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -1,4 +1,4 @@
-configured_endpoints: 135
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-b062c33330de7e3bbf992fd4f0799afd868c30a66c39418dd2c62f4add3b45b6.yml
-openapi_spec_hash: fe067f5b1c0e93799b5ea7fde3c4b1b3
-config_hash: 4b6f471b24d659514b86b736c90a0c0a
+configured_endpoints: 136
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-812a10f8fb54c584efc914422b574cb3f43dc238b5733b13f6a0b2308b7d9910.yml
+openapi_spec_hash: 0222041ba12a5ff6b94924a834fa91a2
+config_hash: 50ee3382a63c021a9f821a935950e926

@@ -865,6 +865,18 @@ Methods:
 
 - <code title="get /responses/{response_id}/input_items">client.responses.input_items.<a href="./src/openai/resources/responses/input_items.py">list</a>(response_id, \*\*<a href="src/openai/types/responses/input_item_list_params.py">params</a>) -> <a href="./src/openai/types/responses/response_item.py">SyncCursorPage[ResponseItem]</a></code>
 
+## InputTokens
+
+Types:
+
+```python
+from openai.types.responses import InputTokenCountResponse
+```
+
+Methods:
+
+- <code title="post /responses/input_tokens">client.responses.input_tokens.<a href="./src/openai/resources/responses/input_tokens.py">count</a>(\*\*<a href="src/openai/types/responses/input_token_count_params.py">params</a>) -> <a href="./src/openai/types/responses/input_token_count_response.py">InputTokenCountResponse</a></code>
+
 # Realtime
 
 Types:

Commit 62a184d7

Commit `62a184d7`