main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5from typing import Union, Iterable, Optional
6from typing_extensions import Literal
7
8import httpx
9
10from ... import _legacy_response
11from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
12from ..._utils import maybe_transform, async_maybe_transform
13from ..._compat import cached_property
14from ..._resource import SyncAPIResource, AsyncAPIResource
15from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
16from ..._base_client import make_request_options
17from ...types.responses import input_token_count_params
18from ...types.responses.tool_param import ToolParam
19from ...types.shared_params.reasoning import Reasoning
20from ...types.responses.response_input_item_param import ResponseInputItemParam
21from ...types.responses.input_token_count_response import InputTokenCountResponse
22
23__all__ = ["InputTokens", "AsyncInputTokens"]
24
25
26class InputTokens(SyncAPIResource):
27 @cached_property
28 def with_raw_response(self) -> InputTokensWithRawResponse:
29 """
30 This property can be used as a prefix for any HTTP method call to return
31 the raw response object instead of the parsed content.
32
33 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
34 """
35 return InputTokensWithRawResponse(self)
36
37 @cached_property
38 def with_streaming_response(self) -> InputTokensWithStreamingResponse:
39 """
40 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
41
42 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
43 """
44 return InputTokensWithStreamingResponse(self)
45
46 def count(
47 self,
48 *,
49 conversation: Optional[input_token_count_params.Conversation] | Omit = omit,
50 input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
51 instructions: Optional[str] | Omit = omit,
52 model: Optional[str] | Omit = omit,
53 parallel_tool_calls: Optional[bool] | Omit = omit,
54 previous_response_id: Optional[str] | Omit = omit,
55 reasoning: Optional[Reasoning] | Omit = omit,
56 text: Optional[input_token_count_params.Text] | Omit = omit,
57 tool_choice: Optional[input_token_count_params.ToolChoice] | Omit = omit,
58 tools: Optional[Iterable[ToolParam]] | Omit = omit,
59 truncation: Literal["auto", "disabled"] | Omit = omit,
60 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
61 # The extra values given here take precedence over values defined on the client or passed to this method.
62 extra_headers: Headers | None = None,
63 extra_query: Query | None = None,
64 extra_body: Body | None = None,
65 timeout: float | httpx.Timeout | None | NotGiven = not_given,
66 ) -> InputTokenCountResponse:
67 """
68 Get input token counts
69
70 Args:
71 conversation: The conversation that this response belongs to. Items from this conversation are
72 prepended to `input_items` for this response request. Input items and output
73 items from this response are automatically added to this conversation after this
74 response completes.
75
76 input: Text, image, or file inputs to the model, used to generate a response
77
78 instructions: A system (or developer) message inserted into the model's context. When used
79 along with `previous_response_id`, the instructions from a previous response
80 will not be carried over to the next response. This makes it simple to swap out
81 system (or developer) messages in new responses.
82
83 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
84 wide range of models with different capabilities, performance characteristics,
85 and price points. Refer to the
86 [model guide](https://platform.openai.com/docs/models) to browse and compare
87 available models.
88
89 parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
90
91 previous_response_id: The unique ID of the previous response to the model. Use this to create
92 multi-turn conversations. Learn more about
93 [conversation state](https://platform.openai.com/docs/guides/conversation-state).
94 Cannot be used in conjunction with `conversation`.
95
96 reasoning: **gpt-5 and o-series models only** Configuration options for
97 [reasoning models](https://platform.openai.com/docs/guides/reasoning).
98
99 text: Configuration options for a text response from the model. Can be plain text or
100 structured JSON data. Learn more:
101
102 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
103 - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
104
105 tool_choice: How the model should select which tool (or tools) to use when generating a
106 response. See the `tools` parameter to see how to specify which tools the model
107 can call.
108
109 tools: An array of tools the model may call while generating a response. You can
110 specify which tool to use by setting the `tool_choice` parameter.
111
112 truncation: The truncation strategy to use for the model response. - `auto`: If the input to
113 this Response exceeds the model's context window size, the model will truncate
114 the response to fit the context window by dropping items from the beginning of
115 the conversation. - `disabled` (default): If the input size will exceed the
116 context window size for a model, the request will fail with a 400 error.
117
118 extra_headers: Send extra headers
119
120 extra_query: Add additional query parameters to the request
121
122 extra_body: Add additional JSON properties to the request
123
124 timeout: Override the client-level default timeout for this request, in seconds
125 """
126 return self._post(
127 "/responses/input_tokens",
128 body=maybe_transform(
129 {
130 "conversation": conversation,
131 "input": input,
132 "instructions": instructions,
133 "model": model,
134 "parallel_tool_calls": parallel_tool_calls,
135 "previous_response_id": previous_response_id,
136 "reasoning": reasoning,
137 "text": text,
138 "tool_choice": tool_choice,
139 "tools": tools,
140 "truncation": truncation,
141 },
142 input_token_count_params.InputTokenCountParams,
143 ),
144 options=make_request_options(
145 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
146 ),
147 cast_to=InputTokenCountResponse,
148 )
149
150
151class AsyncInputTokens(AsyncAPIResource):
152 @cached_property
153 def with_raw_response(self) -> AsyncInputTokensWithRawResponse:
154 """
155 This property can be used as a prefix for any HTTP method call to return
156 the raw response object instead of the parsed content.
157
158 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
159 """
160 return AsyncInputTokensWithRawResponse(self)
161
162 @cached_property
163 def with_streaming_response(self) -> AsyncInputTokensWithStreamingResponse:
164 """
165 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
166
167 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
168 """
169 return AsyncInputTokensWithStreamingResponse(self)
170
171 async def count(
172 self,
173 *,
174 conversation: Optional[input_token_count_params.Conversation] | Omit = omit,
175 input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
176 instructions: Optional[str] | Omit = omit,
177 model: Optional[str] | Omit = omit,
178 parallel_tool_calls: Optional[bool] | Omit = omit,
179 previous_response_id: Optional[str] | Omit = omit,
180 reasoning: Optional[Reasoning] | Omit = omit,
181 text: Optional[input_token_count_params.Text] | Omit = omit,
182 tool_choice: Optional[input_token_count_params.ToolChoice] | Omit = omit,
183 tools: Optional[Iterable[ToolParam]] | Omit = omit,
184 truncation: Literal["auto", "disabled"] | Omit = omit,
185 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
186 # The extra values given here take precedence over values defined on the client or passed to this method.
187 extra_headers: Headers | None = None,
188 extra_query: Query | None = None,
189 extra_body: Body | None = None,
190 timeout: float | httpx.Timeout | None | NotGiven = not_given,
191 ) -> InputTokenCountResponse:
192 """
193 Get input token counts
194
195 Args:
196 conversation: The conversation that this response belongs to. Items from this conversation are
197 prepended to `input_items` for this response request. Input items and output
198 items from this response are automatically added to this conversation after this
199 response completes.
200
201 input: Text, image, or file inputs to the model, used to generate a response
202
203 instructions: A system (or developer) message inserted into the model's context. When used
204 along with `previous_response_id`, the instructions from a previous response
205 will not be carried over to the next response. This makes it simple to swap out
206 system (or developer) messages in new responses.
207
208 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
209 wide range of models with different capabilities, performance characteristics,
210 and price points. Refer to the
211 [model guide](https://platform.openai.com/docs/models) to browse and compare
212 available models.
213
214 parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
215
216 previous_response_id: The unique ID of the previous response to the model. Use this to create
217 multi-turn conversations. Learn more about
218 [conversation state](https://platform.openai.com/docs/guides/conversation-state).
219 Cannot be used in conjunction with `conversation`.
220
221 reasoning: **gpt-5 and o-series models only** Configuration options for
222 [reasoning models](https://platform.openai.com/docs/guides/reasoning).
223
224 text: Configuration options for a text response from the model. Can be plain text or
225 structured JSON data. Learn more:
226
227 - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
228 - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
229
230 tool_choice: How the model should select which tool (or tools) to use when generating a
231 response. See the `tools` parameter to see how to specify which tools the model
232 can call.
233
234 tools: An array of tools the model may call while generating a response. You can
235 specify which tool to use by setting the `tool_choice` parameter.
236
237 truncation: The truncation strategy to use for the model response. - `auto`: If the input to
238 this Response exceeds the model's context window size, the model will truncate
239 the response to fit the context window by dropping items from the beginning of
240 the conversation. - `disabled` (default): If the input size will exceed the
241 context window size for a model, the request will fail with a 400 error.
242
243 extra_headers: Send extra headers
244
245 extra_query: Add additional query parameters to the request
246
247 extra_body: Add additional JSON properties to the request
248
249 timeout: Override the client-level default timeout for this request, in seconds
250 """
251 return await self._post(
252 "/responses/input_tokens",
253 body=await async_maybe_transform(
254 {
255 "conversation": conversation,
256 "input": input,
257 "instructions": instructions,
258 "model": model,
259 "parallel_tool_calls": parallel_tool_calls,
260 "previous_response_id": previous_response_id,
261 "reasoning": reasoning,
262 "text": text,
263 "tool_choice": tool_choice,
264 "tools": tools,
265 "truncation": truncation,
266 },
267 input_token_count_params.InputTokenCountParams,
268 ),
269 options=make_request_options(
270 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
271 ),
272 cast_to=InputTokenCountResponse,
273 )
274
275
276class InputTokensWithRawResponse:
277 def __init__(self, input_tokens: InputTokens) -> None:
278 self._input_tokens = input_tokens
279
280 self.count = _legacy_response.to_raw_response_wrapper(
281 input_tokens.count,
282 )
283
284
285class AsyncInputTokensWithRawResponse:
286 def __init__(self, input_tokens: AsyncInputTokens) -> None:
287 self._input_tokens = input_tokens
288
289 self.count = _legacy_response.async_to_raw_response_wrapper(
290 input_tokens.count,
291 )
292
293
294class InputTokensWithStreamingResponse:
295 def __init__(self, input_tokens: InputTokens) -> None:
296 self._input_tokens = input_tokens
297
298 self.count = to_streamed_response_wrapper(
299 input_tokens.count,
300 )
301
302
303class AsyncInputTokensWithStreamingResponse:
304 def __init__(self, input_tokens: AsyncInputTokens) -> None:
305 self._input_tokens = input_tokens
306
307 self.count = async_to_streamed_response_wrapper(
308 input_tokens.count,
309 )