main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5import inspect
6from typing import Dict, List, Type, Union, Iterable, Optional, cast
7from functools import partial
8from typing_extensions import Literal, overload
9
10import httpx
11import pydantic
12
13from .... import _legacy_response
14from .messages import (
15 Messages,
16 AsyncMessages,
17 MessagesWithRawResponse,
18 AsyncMessagesWithRawResponse,
19 MessagesWithStreamingResponse,
20 AsyncMessagesWithStreamingResponse,
21)
22from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
23from ...._utils import required_args, maybe_transform, async_maybe_transform
24from ...._compat import cached_property
25from ...._resource import SyncAPIResource, AsyncAPIResource
26from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
27from ...._streaming import Stream, AsyncStream
28from ....pagination import SyncCursorPage, AsyncCursorPage
29from ....types.chat import (
30 ChatCompletionAudioParam,
31 completion_list_params,
32 completion_create_params,
33 completion_update_params,
34)
35from ...._base_client import AsyncPaginator, make_request_options
36from ....lib._parsing import (
37 ResponseFormatT,
38 validate_input_tools as _validate_input_tools,
39 parse_chat_completion as _parse_chat_completion,
40 type_to_response_format_param as _type_to_response_format,
41)
42from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager
43from ....types.shared.chat_model import ChatModel
44from ....types.chat.chat_completion import ChatCompletion
45from ....types.shared_params.metadata import Metadata
46from ....types.shared.reasoning_effort import ReasoningEffort
47from ....types.chat.chat_completion_chunk import ChatCompletionChunk
48from ....types.chat.parsed_chat_completion import ParsedChatCompletion
49from ....types.chat.chat_completion_deleted import ChatCompletionDeleted
50from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
51from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
52from ....types.chat.chat_completion_tool_union_param import ChatCompletionToolUnionParam
53from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
54from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
55from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
56
57__all__ = ["Completions", "AsyncCompletions"]
58
59
60class Completions(SyncAPIResource):
61 @cached_property
62 def messages(self) -> Messages:
63 return Messages(self._client)
64
65 @cached_property
66 def with_raw_response(self) -> CompletionsWithRawResponse:
67 """
68 This property can be used as a prefix for any HTTP method call to return
69 the raw response object instead of the parsed content.
70
71 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
72 """
73 return CompletionsWithRawResponse(self)
74
75 @cached_property
76 def with_streaming_response(self) -> CompletionsWithStreamingResponse:
77 """
78 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
79
80 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
81 """
82 return CompletionsWithStreamingResponse(self)
83
84 def parse(
85 self,
86 *,
87 messages: Iterable[ChatCompletionMessageParam],
88 model: Union[str, ChatModel],
89 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
90 response_format: type[ResponseFormatT] | Omit = omit,
91 frequency_penalty: Optional[float] | Omit = omit,
92 function_call: completion_create_params.FunctionCall | Omit = omit,
93 functions: Iterable[completion_create_params.Function] | Omit = omit,
94 logit_bias: Optional[Dict[str, int]] | Omit = omit,
95 logprobs: Optional[bool] | Omit = omit,
96 max_completion_tokens: Optional[int] | Omit = omit,
97 max_tokens: Optional[int] | Omit = omit,
98 metadata: Optional[Metadata] | Omit = omit,
99 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
100 n: Optional[int] | Omit = omit,
101 parallel_tool_calls: bool | Omit = omit,
102 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
103 presence_penalty: Optional[float] | Omit = omit,
104 prompt_cache_key: str | Omit = omit,
105 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
106 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
107 safety_identifier: str | Omit = omit,
108 seed: Optional[int] | Omit = omit,
109 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
110 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
111 store: Optional[bool] | Omit = omit,
112 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
113 temperature: Optional[float] | Omit = omit,
114 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
115 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
116 top_logprobs: Optional[int] | Omit = omit,
117 top_p: Optional[float] | Omit = omit,
118 user: str | Omit = omit,
119 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
120 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
121 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
122 # The extra values given here take precedence over values defined on the client or passed to this method.
123 extra_headers: Headers | None = None,
124 extra_query: Query | None = None,
125 extra_body: Body | None = None,
126 timeout: float | httpx.Timeout | None | NotGiven = not_given,
127 ) -> ParsedChatCompletion[ResponseFormatT]:
128 """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
129 & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
130
131 You can pass a pydantic model to this method and it will automatically convert the model
132 into a JSON schema, send it to the API and parse the response content back into the given model.
133
134 This method will also automatically parse `function` tool calls if:
135 - You use the `openai.pydantic_function_tool()` helper method
136 - You mark your tool schema with `"strict": True`
137
138 Example usage:
139 ```py
140 from pydantic import BaseModel
141 from openai import OpenAI
142
143
144 class Step(BaseModel):
145 explanation: str
146 output: str
147
148
149 class MathResponse(BaseModel):
150 steps: List[Step]
151 final_answer: str
152
153
154 client = OpenAI()
155 completion = client.chat.completions.parse(
156 model="gpt-4o-2024-08-06",
157 messages=[
158 {"role": "system", "content": "You are a helpful math tutor."},
159 {"role": "user", "content": "solve 8x + 31 = 2"},
160 ],
161 response_format=MathResponse,
162 )
163
164 message = completion.choices[0].message
165 if message.parsed:
166 print(message.parsed.steps)
167 print("answer: ", message.parsed.final_answer)
168 ```
169 """
170 chat_completion_tools = _validate_input_tools(tools)
171
172 extra_headers = {
173 "X-Stainless-Helper-Method": "chat.completions.parse",
174 **(extra_headers or {}),
175 }
176
177 def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
178 return _parse_chat_completion(
179 response_format=response_format,
180 chat_completion=raw_completion,
181 input_tools=chat_completion_tools,
182 )
183
184 return self._post(
185 "/chat/completions",
186 body=maybe_transform(
187 {
188 "messages": messages,
189 "model": model,
190 "audio": audio,
191 "frequency_penalty": frequency_penalty,
192 "function_call": function_call,
193 "functions": functions,
194 "logit_bias": logit_bias,
195 "logprobs": logprobs,
196 "max_completion_tokens": max_completion_tokens,
197 "max_tokens": max_tokens,
198 "metadata": metadata,
199 "modalities": modalities,
200 "n": n,
201 "parallel_tool_calls": parallel_tool_calls,
202 "prediction": prediction,
203 "presence_penalty": presence_penalty,
204 "prompt_cache_key": prompt_cache_key,
205 "prompt_cache_retention": prompt_cache_retention,
206 "reasoning_effort": reasoning_effort,
207 "response_format": _type_to_response_format(response_format),
208 "safety_identifier": safety_identifier,
209 "seed": seed,
210 "service_tier": service_tier,
211 "stop": stop,
212 "store": store,
213 "stream": False,
214 "stream_options": stream_options,
215 "temperature": temperature,
216 "tool_choice": tool_choice,
217 "tools": tools,
218 "top_logprobs": top_logprobs,
219 "top_p": top_p,
220 "user": user,
221 "verbosity": verbosity,
222 "web_search_options": web_search_options,
223 },
224 completion_create_params.CompletionCreateParams,
225 ),
226 options=make_request_options(
227 extra_headers=extra_headers,
228 extra_query=extra_query,
229 extra_body=extra_body,
230 timeout=timeout,
231 post_parser=parser,
232 ),
233 # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
234 # in the `parser` function above
235 cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
236 stream=False,
237 )
238
239 @overload
240 def create(
241 self,
242 *,
243 messages: Iterable[ChatCompletionMessageParam],
244 model: Union[str, ChatModel],
245 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
246 frequency_penalty: Optional[float] | Omit = omit,
247 function_call: completion_create_params.FunctionCall | Omit = omit,
248 functions: Iterable[completion_create_params.Function] | Omit = omit,
249 logit_bias: Optional[Dict[str, int]] | Omit = omit,
250 logprobs: Optional[bool] | Omit = omit,
251 max_completion_tokens: Optional[int] | Omit = omit,
252 max_tokens: Optional[int] | Omit = omit,
253 metadata: Optional[Metadata] | Omit = omit,
254 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
255 n: Optional[int] | Omit = omit,
256 parallel_tool_calls: bool | Omit = omit,
257 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
258 presence_penalty: Optional[float] | Omit = omit,
259 prompt_cache_key: str | Omit = omit,
260 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
261 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
262 response_format: completion_create_params.ResponseFormat | Omit = omit,
263 safety_identifier: str | Omit = omit,
264 seed: Optional[int] | Omit = omit,
265 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
266 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
267 store: Optional[bool] | Omit = omit,
268 stream: Optional[Literal[False]] | Omit = omit,
269 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
270 temperature: Optional[float] | Omit = omit,
271 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
272 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
273 top_logprobs: Optional[int] | Omit = omit,
274 top_p: Optional[float] | Omit = omit,
275 user: str | Omit = omit,
276 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
277 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
278 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
279 # The extra values given here take precedence over values defined on the client or passed to this method.
280 extra_headers: Headers | None = None,
281 extra_query: Query | None = None,
282 extra_body: Body | None = None,
283 timeout: float | httpx.Timeout | None | NotGiven = not_given,
284 ) -> ChatCompletion:
285 """
286 **Starting a new project?** We recommend trying
287 [Responses](https://platform.openai.com/docs/api-reference/responses) to take
288 advantage of the latest OpenAI platform features. Compare
289 [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
290
291 ---
292
293 Creates a model response for the given chat conversation. Learn more in the
294 [text generation](https://platform.openai.com/docs/guides/text-generation),
295 [vision](https://platform.openai.com/docs/guides/vision), and
296 [audio](https://platform.openai.com/docs/guides/audio) guides.
297
298 Parameter support can differ depending on the model used to generate the
299 response, particularly for newer reasoning models. Parameters that are only
300 supported for reasoning models are noted below. For the current state of
301 unsupported parameters in reasoning models,
302 [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
303
304 Args:
305 messages: A list of messages comprising the conversation so far. Depending on the
306 [model](https://platform.openai.com/docs/models) you use, different message
307 types (modalities) are supported, like
308 [text](https://platform.openai.com/docs/guides/text-generation),
309 [images](https://platform.openai.com/docs/guides/vision), and
310 [audio](https://platform.openai.com/docs/guides/audio).
311
312 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
313 wide range of models with different capabilities, performance characteristics,
314 and price points. Refer to the
315 [model guide](https://platform.openai.com/docs/models) to browse and compare
316 available models.
317
318 audio: Parameters for audio output. Required when audio output is requested with
319 `modalities: ["audio"]`.
320 [Learn more](https://platform.openai.com/docs/guides/audio).
321
322 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
323 existing frequency in the text so far, decreasing the model's likelihood to
324 repeat the same line verbatim.
325
326 function_call: Deprecated in favor of `tool_choice`.
327
328 Controls which (if any) function is called by the model.
329
330 `none` means the model will not call a function and instead generates a message.
331
332 `auto` means the model can pick between generating a message or calling a
333 function.
334
335 Specifying a particular function via `{"name": "my_function"}` forces the model
336 to call that function.
337
338 `none` is the default when no functions are present. `auto` is the default if
339 functions are present.
340
341 functions: Deprecated in favor of `tools`.
342
343 A list of functions the model may generate JSON inputs for.
344
345 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
346
347 Accepts a JSON object that maps tokens (specified by their token ID in the
348 tokenizer) to an associated bias value from -100 to 100. Mathematically, the
349 bias is added to the logits generated by the model prior to sampling. The exact
350 effect will vary per model, but values between -1 and 1 should decrease or
351 increase likelihood of selection; values like -100 or 100 should result in a ban
352 or exclusive selection of the relevant token.
353
354 logprobs: Whether to return log probabilities of the output tokens or not. If true,
355 returns the log probabilities of each output token returned in the `content` of
356 `message`.
357
358 max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
359 including visible output tokens and
360 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
361
362 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
363 completion. This value can be used to control
364 [costs](https://openai.com/api/pricing/) for text generated via API.
365
366 This value is now deprecated in favor of `max_completion_tokens`, and is not
367 compatible with
368 [o-series models](https://platform.openai.com/docs/guides/reasoning).
369
370 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
371 for storing additional information about the object in a structured format, and
372 querying for objects via API or the dashboard.
373
374 Keys are strings with a maximum length of 64 characters. Values are strings with
375 a maximum length of 512 characters.
376
377 modalities: Output types that you would like the model to generate. Most models are capable
378 of generating text, which is the default:
379
380 `["text"]`
381
382 The `gpt-4o-audio-preview` model can also be used to
383 [generate audio](https://platform.openai.com/docs/guides/audio). To request that
384 this model generate both text and audio responses, you can use:
385
386 `["text", "audio"]`
387
388 n: How many chat completion choices to generate for each input message. Note that
389 you will be charged based on the number of generated tokens across all of the
390 choices. Keep `n` as `1` to minimize costs.
391
392 parallel_tool_calls: Whether to enable
393 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
394 during tool use.
395
396 prediction: Static predicted output content, such as the content of a text file that is
397 being regenerated.
398
399 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
400 whether they appear in the text so far, increasing the model's likelihood to
401 talk about new topics.
402
403 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
404 hit rates. Replaces the `user` field.
405 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
406
407 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
408 prompt caching, which keeps cached prefixes active for longer, up to a maximum
409 of 24 hours.
410 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
411
412 reasoning_effort: Constrains effort on reasoning for
413 [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
414 supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
415 Reducing reasoning effort can result in faster responses and fewer tokens used
416 on reasoning in a response.
417
418 - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
419 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
420 calls are supported for all reasoning values in gpt-5.1.
421 - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
422 support `none`.
423 - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
424 - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
425
426 response_format: An object specifying the format that the model must output.
427
428 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
429 Outputs which ensures the model will match your supplied JSON schema. Learn more
430 in the
431 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
432
433 Setting to `{ "type": "json_object" }` enables the older JSON mode, which
434 ensures the message the model generates is valid JSON. Using `json_schema` is
435 preferred for models that support it.
436
437 safety_identifier: A stable identifier used to help detect users of your application that may be
438 violating OpenAI's usage policies. The IDs should be a string that uniquely
439 identifies each user. We recommend hashing their username or email address, in
440 order to avoid sending us any identifying information.
441 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
442
443 seed: This feature is in Beta. If specified, our system will make a best effort to
444 sample deterministically, such that repeated requests with the same `seed` and
445 parameters should return the same result. Determinism is not guaranteed, and you
446 should refer to the `system_fingerprint` response parameter to monitor changes
447 in the backend.
448
449 service_tier: Specifies the processing type used for serving the request.
450
451 - If set to 'auto', then the request will be processed with the service tier
452 configured in the Project settings. Unless otherwise configured, the Project
453 will use 'default'.
454 - If set to 'default', then the request will be processed with the standard
455 pricing and performance for the selected model.
456 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
457 '[priority](https://openai.com/api-priority-processing/)', then the request
458 will be processed with the corresponding service tier.
459 - When not set, the default behavior is 'auto'.
460
461 When the `service_tier` parameter is set, the response body will include the
462 `service_tier` value based on the processing mode actually used to serve the
463 request. This response value may be different from the value set in the
464 parameter.
465
466 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
467
468 Up to 4 sequences where the API will stop generating further tokens. The
469 returned text will not contain the stop sequence.
470
471 store: Whether or not to store the output of this chat completion request for use in
472 our [model distillation](https://platform.openai.com/docs/guides/distillation)
473 or [evals](https://platform.openai.com/docs/guides/evals) products.
474
475 Supports text and image inputs. Note: image inputs over 8MB will be dropped.
476
477 stream: If set to true, the model response data will be streamed to the client as it is
478 generated using
479 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
480 See the
481 [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
482 for more information, along with the
483 [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
484 guide for more information on how to handle the streaming events.
485
486 stream_options: Options for streaming response. Only set this when you set `stream: true`.
487
488 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
489 make the output more random, while lower values like 0.2 will make it more
490 focused and deterministic. We generally recommend altering this or `top_p` but
491 not both.
492
493 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
494 not call any tool and instead generates a message. `auto` means the model can
495 pick between generating a message or calling one or more tools. `required` means
496 the model must call one or more tools. Specifying a particular tool via
497 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
498 call that tool.
499
500 `none` is the default when no tools are present. `auto` is the default if tools
501 are present.
502
503 tools: A list of tools the model may call. You can provide either
504 [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
505 or [function tools](https://platform.openai.com/docs/guides/function-calling).
506
507 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
508 return at each token position, each with an associated log probability.
509 `logprobs` must be set to `true` if this parameter is used.
510
511 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
512 model considers the results of the tokens with top_p probability mass. So 0.1
513 means only the tokens comprising the top 10% probability mass are considered.
514
515 We generally recommend altering this or `temperature` but not both.
516
517 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
518 `prompt_cache_key` instead to maintain caching optimizations. A stable
519 identifier for your end-users. Used to boost cache hit rates by better bucketing
520 similar requests and to help OpenAI detect and prevent abuse.
521 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
522
523 verbosity: Constrains the verbosity of the model's response. Lower values will result in
524 more concise responses, while higher values will result in more verbose
525 responses. Currently supported values are `low`, `medium`, and `high`.
526
527 web_search_options: This tool searches the web for relevant results to use in a response. Learn more
528 about the
529 [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
530
531 extra_headers: Send extra headers
532
533 extra_query: Add additional query parameters to the request
534
535 extra_body: Add additional JSON properties to the request
536
537 timeout: Override the client-level default timeout for this request, in seconds
538 """
539 ...
540
541 @overload
542 def create(
543 self,
544 *,
545 messages: Iterable[ChatCompletionMessageParam],
546 model: Union[str, ChatModel],
547 stream: Literal[True],
548 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
549 frequency_penalty: Optional[float] | Omit = omit,
550 function_call: completion_create_params.FunctionCall | Omit = omit,
551 functions: Iterable[completion_create_params.Function] | Omit = omit,
552 logit_bias: Optional[Dict[str, int]] | Omit = omit,
553 logprobs: Optional[bool] | Omit = omit,
554 max_completion_tokens: Optional[int] | Omit = omit,
555 max_tokens: Optional[int] | Omit = omit,
556 metadata: Optional[Metadata] | Omit = omit,
557 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
558 n: Optional[int] | Omit = omit,
559 parallel_tool_calls: bool | Omit = omit,
560 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
561 presence_penalty: Optional[float] | Omit = omit,
562 prompt_cache_key: str | Omit = omit,
563 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
564 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
565 response_format: completion_create_params.ResponseFormat | Omit = omit,
566 safety_identifier: str | Omit = omit,
567 seed: Optional[int] | Omit = omit,
568 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
569 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
570 store: Optional[bool] | Omit = omit,
571 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
572 temperature: Optional[float] | Omit = omit,
573 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
574 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
575 top_logprobs: Optional[int] | Omit = omit,
576 top_p: Optional[float] | Omit = omit,
577 user: str | Omit = omit,
578 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
579 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
580 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
581 # The extra values given here take precedence over values defined on the client or passed to this method.
582 extra_headers: Headers | None = None,
583 extra_query: Query | None = None,
584 extra_body: Body | None = None,
585 timeout: float | httpx.Timeout | None | NotGiven = not_given,
586 ) -> Stream[ChatCompletionChunk]:
587 """
588 **Starting a new project?** We recommend trying
589 [Responses](https://platform.openai.com/docs/api-reference/responses) to take
590 advantage of the latest OpenAI platform features. Compare
591 [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
592
593 ---
594
595 Creates a model response for the given chat conversation. Learn more in the
596 [text generation](https://platform.openai.com/docs/guides/text-generation),
597 [vision](https://platform.openai.com/docs/guides/vision), and
598 [audio](https://platform.openai.com/docs/guides/audio) guides.
599
600 Parameter support can differ depending on the model used to generate the
601 response, particularly for newer reasoning models. Parameters that are only
602 supported for reasoning models are noted below. For the current state of
603 unsupported parameters in reasoning models,
604 [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
605
606 Args:
607 messages: A list of messages comprising the conversation so far. Depending on the
608 [model](https://platform.openai.com/docs/models) you use, different message
609 types (modalities) are supported, like
610 [text](https://platform.openai.com/docs/guides/text-generation),
611 [images](https://platform.openai.com/docs/guides/vision), and
612 [audio](https://platform.openai.com/docs/guides/audio).
613
614 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
615 wide range of models with different capabilities, performance characteristics,
616 and price points. Refer to the
617 [model guide](https://platform.openai.com/docs/models) to browse and compare
618 available models.
619
620 stream: If set to true, the model response data will be streamed to the client as it is
621 generated using
622 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
623 See the
624 [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
625 for more information, along with the
626 [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
627 guide for more information on how to handle the streaming events.
628
629 audio: Parameters for audio output. Required when audio output is requested with
630 `modalities: ["audio"]`.
631 [Learn more](https://platform.openai.com/docs/guides/audio).
632
633 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
634 existing frequency in the text so far, decreasing the model's likelihood to
635 repeat the same line verbatim.
636
637 function_call: Deprecated in favor of `tool_choice`.
638
639 Controls which (if any) function is called by the model.
640
641 `none` means the model will not call a function and instead generates a message.
642
643 `auto` means the model can pick between generating a message or calling a
644 function.
645
646 Specifying a particular function via `{"name": "my_function"}` forces the model
647 to call that function.
648
649 `none` is the default when no functions are present. `auto` is the default if
650 functions are present.
651
652 functions: Deprecated in favor of `tools`.
653
654 A list of functions the model may generate JSON inputs for.
655
656 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
657
658 Accepts a JSON object that maps tokens (specified by their token ID in the
659 tokenizer) to an associated bias value from -100 to 100. Mathematically, the
660 bias is added to the logits generated by the model prior to sampling. The exact
661 effect will vary per model, but values between -1 and 1 should decrease or
662 increase likelihood of selection; values like -100 or 100 should result in a ban
663 or exclusive selection of the relevant token.
664
665 logprobs: Whether to return log probabilities of the output tokens or not. If true,
666 returns the log probabilities of each output token returned in the `content` of
667 `message`.
668
669 max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
670 including visible output tokens and
671 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
672
673 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
674 completion. This value can be used to control
675 [costs](https://openai.com/api/pricing/) for text generated via API.
676
677 This value is now deprecated in favor of `max_completion_tokens`, and is not
678 compatible with
679 [o-series models](https://platform.openai.com/docs/guides/reasoning).
680
681 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
682 for storing additional information about the object in a structured format, and
683 querying for objects via API or the dashboard.
684
685 Keys are strings with a maximum length of 64 characters. Values are strings with
686 a maximum length of 512 characters.
687
688 modalities: Output types that you would like the model to generate. Most models are capable
689 of generating text, which is the default:
690
691 `["text"]`
692
693 The `gpt-4o-audio-preview` model can also be used to
694 [generate audio](https://platform.openai.com/docs/guides/audio). To request that
695 this model generate both text and audio responses, you can use:
696
697 `["text", "audio"]`
698
699 n: How many chat completion choices to generate for each input message. Note that
700 you will be charged based on the number of generated tokens across all of the
701 choices. Keep `n` as `1` to minimize costs.
702
703 parallel_tool_calls: Whether to enable
704 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
705 during tool use.
706
707 prediction: Static predicted output content, such as the content of a text file that is
708 being regenerated.
709
710 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
711 whether they appear in the text so far, increasing the model's likelihood to
712 talk about new topics.
713
714 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
715 hit rates. Replaces the `user` field.
716 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
717
718 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
719 prompt caching, which keeps cached prefixes active for longer, up to a maximum
720 of 24 hours.
721 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
722
723 reasoning_effort: Constrains effort on reasoning for
724 [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
725 supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
726 Reducing reasoning effort can result in faster responses and fewer tokens used
727 on reasoning in a response.
728
729 - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
730 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
731 calls are supported for all reasoning values in gpt-5.1.
732 - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
733 support `none`.
734 - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
735 - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
736
737 response_format: An object specifying the format that the model must output.
738
739 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
740 Outputs which ensures the model will match your supplied JSON schema. Learn more
741 in the
742 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
743
744 Setting to `{ "type": "json_object" }` enables the older JSON mode, which
745 ensures the message the model generates is valid JSON. Using `json_schema` is
746 preferred for models that support it.
747
748 safety_identifier: A stable identifier used to help detect users of your application that may be
749 violating OpenAI's usage policies. The IDs should be a string that uniquely
750 identifies each user. We recommend hashing their username or email address, in
751 order to avoid sending us any identifying information.
752 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
753
754 seed: This feature is in Beta. If specified, our system will make a best effort to
755 sample deterministically, such that repeated requests with the same `seed` and
756 parameters should return the same result. Determinism is not guaranteed, and you
757 should refer to the `system_fingerprint` response parameter to monitor changes
758 in the backend.
759
760 service_tier: Specifies the processing type used for serving the request.
761
762 - If set to 'auto', then the request will be processed with the service tier
763 configured in the Project settings. Unless otherwise configured, the Project
764 will use 'default'.
765 - If set to 'default', then the request will be processed with the standard
766 pricing and performance for the selected model.
767 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
768 '[priority](https://openai.com/api-priority-processing/)', then the request
769 will be processed with the corresponding service tier.
770 - When not set, the default behavior is 'auto'.
771
772 When the `service_tier` parameter is set, the response body will include the
773 `service_tier` value based on the processing mode actually used to serve the
774 request. This response value may be different from the value set in the
775 parameter.
776
777 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
778
779 Up to 4 sequences where the API will stop generating further tokens. The
780 returned text will not contain the stop sequence.
781
782 store: Whether or not to store the output of this chat completion request for use in
783 our [model distillation](https://platform.openai.com/docs/guides/distillation)
784 or [evals](https://platform.openai.com/docs/guides/evals) products.
785
786 Supports text and image inputs. Note: image inputs over 8MB will be dropped.
787
788 stream_options: Options for streaming response. Only set this when you set `stream: true`.
789
790 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
791 make the output more random, while lower values like 0.2 will make it more
792 focused and deterministic. We generally recommend altering this or `top_p` but
793 not both.
794
795 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
796 not call any tool and instead generates a message. `auto` means the model can
797 pick between generating a message or calling one or more tools. `required` means
798 the model must call one or more tools. Specifying a particular tool via
799 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
800 call that tool.
801
802 `none` is the default when no tools are present. `auto` is the default if tools
803 are present.
804
805 tools: A list of tools the model may call. You can provide either
806 [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
807 or [function tools](https://platform.openai.com/docs/guides/function-calling).
808
809 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
810 return at each token position, each with an associated log probability.
811 `logprobs` must be set to `true` if this parameter is used.
812
813 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
814 model considers the results of the tokens with top_p probability mass. So 0.1
815 means only the tokens comprising the top 10% probability mass are considered.
816
817 We generally recommend altering this or `temperature` but not both.
818
819 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
820 `prompt_cache_key` instead to maintain caching optimizations. A stable
821 identifier for your end-users. Used to boost cache hit rates by better bucketing
822 similar requests and to help OpenAI detect and prevent abuse.
823 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
824
825 verbosity: Constrains the verbosity of the model's response. Lower values will result in
826 more concise responses, while higher values will result in more verbose
827 responses. Currently supported values are `low`, `medium`, and `high`.
828
829 web_search_options: This tool searches the web for relevant results to use in a response. Learn more
830 about the
831 [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
832
833 extra_headers: Send extra headers
834
835 extra_query: Add additional query parameters to the request
836
837 extra_body: Add additional JSON properties to the request
838
839 timeout: Override the client-level default timeout for this request, in seconds
840 """
841 ...
842
843 @overload
844 def create(
845 self,
846 *,
847 messages: Iterable[ChatCompletionMessageParam],
848 model: Union[str, ChatModel],
849 stream: bool,
850 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
851 frequency_penalty: Optional[float] | Omit = omit,
852 function_call: completion_create_params.FunctionCall | Omit = omit,
853 functions: Iterable[completion_create_params.Function] | Omit = omit,
854 logit_bias: Optional[Dict[str, int]] | Omit = omit,
855 logprobs: Optional[bool] | Omit = omit,
856 max_completion_tokens: Optional[int] | Omit = omit,
857 max_tokens: Optional[int] | Omit = omit,
858 metadata: Optional[Metadata] | Omit = omit,
859 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
860 n: Optional[int] | Omit = omit,
861 parallel_tool_calls: bool | Omit = omit,
862 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
863 presence_penalty: Optional[float] | Omit = omit,
864 prompt_cache_key: str | Omit = omit,
865 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
866 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
867 response_format: completion_create_params.ResponseFormat | Omit = omit,
868 safety_identifier: str | Omit = omit,
869 seed: Optional[int] | Omit = omit,
870 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
871 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
872 store: Optional[bool] | Omit = omit,
873 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
874 temperature: Optional[float] | Omit = omit,
875 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
876 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
877 top_logprobs: Optional[int] | Omit = omit,
878 top_p: Optional[float] | Omit = omit,
879 user: str | Omit = omit,
880 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
881 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
882 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
883 # The extra values given here take precedence over values defined on the client or passed to this method.
884 extra_headers: Headers | None = None,
885 extra_query: Query | None = None,
886 extra_body: Body | None = None,
887 timeout: float | httpx.Timeout | None | NotGiven = not_given,
888 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
889 """
890 **Starting a new project?** We recommend trying
891 [Responses](https://platform.openai.com/docs/api-reference/responses) to take
892 advantage of the latest OpenAI platform features. Compare
893 [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
894
895 ---
896
897 Creates a model response for the given chat conversation. Learn more in the
898 [text generation](https://platform.openai.com/docs/guides/text-generation),
899 [vision](https://platform.openai.com/docs/guides/vision), and
900 [audio](https://platform.openai.com/docs/guides/audio) guides.
901
902 Parameter support can differ depending on the model used to generate the
903 response, particularly for newer reasoning models. Parameters that are only
904 supported for reasoning models are noted below. For the current state of
905 unsupported parameters in reasoning models,
906 [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
907
908 Args:
909 messages: A list of messages comprising the conversation so far. Depending on the
910 [model](https://platform.openai.com/docs/models) you use, different message
911 types (modalities) are supported, like
912 [text](https://platform.openai.com/docs/guides/text-generation),
913 [images](https://platform.openai.com/docs/guides/vision), and
914 [audio](https://platform.openai.com/docs/guides/audio).
915
916 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
917 wide range of models with different capabilities, performance characteristics,
918 and price points. Refer to the
919 [model guide](https://platform.openai.com/docs/models) to browse and compare
920 available models.
921
922 stream: If set to true, the model response data will be streamed to the client as it is
923 generated using
924 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
925 See the
926 [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
927 for more information, along with the
928 [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
929 guide for more information on how to handle the streaming events.
930
931 audio: Parameters for audio output. Required when audio output is requested with
932 `modalities: ["audio"]`.
933 [Learn more](https://platform.openai.com/docs/guides/audio).
934
935 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
936 existing frequency in the text so far, decreasing the model's likelihood to
937 repeat the same line verbatim.
938
939 function_call: Deprecated in favor of `tool_choice`.
940
941 Controls which (if any) function is called by the model.
942
943 `none` means the model will not call a function and instead generates a message.
944
945 `auto` means the model can pick between generating a message or calling a
946 function.
947
948 Specifying a particular function via `{"name": "my_function"}` forces the model
949 to call that function.
950
951 `none` is the default when no functions are present. `auto` is the default if
952 functions are present.
953
954 functions: Deprecated in favor of `tools`.
955
956 A list of functions the model may generate JSON inputs for.
957
958 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
959
960 Accepts a JSON object that maps tokens (specified by their token ID in the
961 tokenizer) to an associated bias value from -100 to 100. Mathematically, the
962 bias is added to the logits generated by the model prior to sampling. The exact
963 effect will vary per model, but values between -1 and 1 should decrease or
964 increase likelihood of selection; values like -100 or 100 should result in a ban
965 or exclusive selection of the relevant token.
966
967 logprobs: Whether to return log probabilities of the output tokens or not. If true,
968 returns the log probabilities of each output token returned in the `content` of
969 `message`.
970
971 max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
972 including visible output tokens and
973 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
974
975 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
976 completion. This value can be used to control
977 [costs](https://openai.com/api/pricing/) for text generated via API.
978
979 This value is now deprecated in favor of `max_completion_tokens`, and is not
980 compatible with
981 [o-series models](https://platform.openai.com/docs/guides/reasoning).
982
983 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
984 for storing additional information about the object in a structured format, and
985 querying for objects via API or the dashboard.
986
987 Keys are strings with a maximum length of 64 characters. Values are strings with
988 a maximum length of 512 characters.
989
990 modalities: Output types that you would like the model to generate. Most models are capable
991 of generating text, which is the default:
992
993 `["text"]`
994
995 The `gpt-4o-audio-preview` model can also be used to
996 [generate audio](https://platform.openai.com/docs/guides/audio). To request that
997 this model generate both text and audio responses, you can use:
998
999 `["text", "audio"]`
1000
1001 n: How many chat completion choices to generate for each input message. Note that
1002 you will be charged based on the number of generated tokens across all of the
1003 choices. Keep `n` as `1` to minimize costs.
1004
1005 parallel_tool_calls: Whether to enable
1006 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
1007 during tool use.
1008
1009 prediction: Static predicted output content, such as the content of a text file that is
1010 being regenerated.
1011
1012 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
1013 whether they appear in the text so far, increasing the model's likelihood to
1014 talk about new topics.
1015
1016 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
1017 hit rates. Replaces the `user` field.
1018 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
1019
1020 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
1021 prompt caching, which keeps cached prefixes active for longer, up to a maximum
1022 of 24 hours.
1023 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
1024
1025 reasoning_effort: Constrains effort on reasoning for
1026 [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
1027 supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
1028 Reducing reasoning effort can result in faster responses and fewer tokens used
1029 on reasoning in a response.
1030
1031 - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
1032 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
1033 calls are supported for all reasoning values in gpt-5.1.
1034 - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
1035 support `none`.
1036 - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
1037 - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
1038
1039 response_format: An object specifying the format that the model must output.
1040
1041 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
1042 Outputs which ensures the model will match your supplied JSON schema. Learn more
1043 in the
1044 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
1045
1046 Setting to `{ "type": "json_object" }` enables the older JSON mode, which
1047 ensures the message the model generates is valid JSON. Using `json_schema` is
1048 preferred for models that support it.
1049
1050 safety_identifier: A stable identifier used to help detect users of your application that may be
1051 violating OpenAI's usage policies. The IDs should be a string that uniquely
1052 identifies each user. We recommend hashing their username or email address, in
1053 order to avoid sending us any identifying information.
1054 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
1055
1056 seed: This feature is in Beta. If specified, our system will make a best effort to
1057 sample deterministically, such that repeated requests with the same `seed` and
1058 parameters should return the same result. Determinism is not guaranteed, and you
1059 should refer to the `system_fingerprint` response parameter to monitor changes
1060 in the backend.
1061
1062 service_tier: Specifies the processing type used for serving the request.
1063
1064 - If set to 'auto', then the request will be processed with the service tier
1065 configured in the Project settings. Unless otherwise configured, the Project
1066 will use 'default'.
1067 - If set to 'default', then the request will be processed with the standard
1068 pricing and performance for the selected model.
1069 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
1070 '[priority](https://openai.com/api-priority-processing/)', then the request
1071 will be processed with the corresponding service tier.
1072 - When not set, the default behavior is 'auto'.
1073
1074 When the `service_tier` parameter is set, the response body will include the
1075 `service_tier` value based on the processing mode actually used to serve the
1076 request. This response value may be different from the value set in the
1077 parameter.
1078
1079 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
1080
1081 Up to 4 sequences where the API will stop generating further tokens. The
1082 returned text will not contain the stop sequence.
1083
1084 store: Whether or not to store the output of this chat completion request for use in
1085 our [model distillation](https://platform.openai.com/docs/guides/distillation)
1086 or [evals](https://platform.openai.com/docs/guides/evals) products.
1087
1088 Supports text and image inputs. Note: image inputs over 8MB will be dropped.
1089
1090 stream_options: Options for streaming response. Only set this when you set `stream: true`.
1091
1092 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1093 make the output more random, while lower values like 0.2 will make it more
1094 focused and deterministic. We generally recommend altering this or `top_p` but
1095 not both.
1096
1097 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
1098 not call any tool and instead generates a message. `auto` means the model can
1099 pick between generating a message or calling one or more tools. `required` means
1100 the model must call one or more tools. Specifying a particular tool via
1101 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
1102 call that tool.
1103
1104 `none` is the default when no tools are present. `auto` is the default if tools
1105 are present.
1106
1107 tools: A list of tools the model may call. You can provide either
1108 [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
1109 or [function tools](https://platform.openai.com/docs/guides/function-calling).
1110
1111 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
1112 return at each token position, each with an associated log probability.
1113 `logprobs` must be set to `true` if this parameter is used.
1114
1115 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1116 model considers the results of the tokens with top_p probability mass. So 0.1
1117 means only the tokens comprising the top 10% probability mass are considered.
1118
1119 We generally recommend altering this or `temperature` but not both.
1120
1121 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
1122 `prompt_cache_key` instead to maintain caching optimizations. A stable
1123 identifier for your end-users. Used to boost cache hit rates by better bucketing
1124 similar requests and to help OpenAI detect and prevent abuse.
1125 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
1126
1127 verbosity: Constrains the verbosity of the model's response. Lower values will result in
1128 more concise responses, while higher values will result in more verbose
1129 responses. Currently supported values are `low`, `medium`, and `high`.
1130
1131 web_search_options: This tool searches the web for relevant results to use in a response. Learn more
1132 about the
1133 [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
1134
1135 extra_headers: Send extra headers
1136
1137 extra_query: Add additional query parameters to the request
1138
1139 extra_body: Add additional JSON properties to the request
1140
1141 timeout: Override the client-level default timeout for this request, in seconds
1142 """
1143 ...
1144
1145 @required_args(["messages", "model"], ["messages", "model", "stream"])
1146 def create(
1147 self,
1148 *,
1149 messages: Iterable[ChatCompletionMessageParam],
1150 model: Union[str, ChatModel],
1151 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
1152 frequency_penalty: Optional[float] | Omit = omit,
1153 function_call: completion_create_params.FunctionCall | Omit = omit,
1154 functions: Iterable[completion_create_params.Function] | Omit = omit,
1155 logit_bias: Optional[Dict[str, int]] | Omit = omit,
1156 logprobs: Optional[bool] | Omit = omit,
1157 max_completion_tokens: Optional[int] | Omit = omit,
1158 max_tokens: Optional[int] | Omit = omit,
1159 metadata: Optional[Metadata] | Omit = omit,
1160 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
1161 n: Optional[int] | Omit = omit,
1162 parallel_tool_calls: bool | Omit = omit,
1163 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
1164 presence_penalty: Optional[float] | Omit = omit,
1165 prompt_cache_key: str | Omit = omit,
1166 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1167 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
1168 response_format: completion_create_params.ResponseFormat | Omit = omit,
1169 safety_identifier: str | Omit = omit,
1170 seed: Optional[int] | Omit = omit,
1171 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1172 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
1173 store: Optional[bool] | Omit = omit,
1174 stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
1175 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
1176 temperature: Optional[float] | Omit = omit,
1177 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
1178 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
1179 top_logprobs: Optional[int] | Omit = omit,
1180 top_p: Optional[float] | Omit = omit,
1181 user: str | Omit = omit,
1182 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
1183 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
1184 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1185 # The extra values given here take precedence over values defined on the client or passed to this method.
1186 extra_headers: Headers | None = None,
1187 extra_query: Query | None = None,
1188 extra_body: Body | None = None,
1189 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1190 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
1191 validate_response_format(response_format)
1192 return self._post(
1193 "/chat/completions",
1194 body=maybe_transform(
1195 {
1196 "messages": messages,
1197 "model": model,
1198 "audio": audio,
1199 "frequency_penalty": frequency_penalty,
1200 "function_call": function_call,
1201 "functions": functions,
1202 "logit_bias": logit_bias,
1203 "logprobs": logprobs,
1204 "max_completion_tokens": max_completion_tokens,
1205 "max_tokens": max_tokens,
1206 "metadata": metadata,
1207 "modalities": modalities,
1208 "n": n,
1209 "parallel_tool_calls": parallel_tool_calls,
1210 "prediction": prediction,
1211 "presence_penalty": presence_penalty,
1212 "prompt_cache_key": prompt_cache_key,
1213 "prompt_cache_retention": prompt_cache_retention,
1214 "reasoning_effort": reasoning_effort,
1215 "response_format": response_format,
1216 "safety_identifier": safety_identifier,
1217 "seed": seed,
1218 "service_tier": service_tier,
1219 "stop": stop,
1220 "store": store,
1221 "stream": stream,
1222 "stream_options": stream_options,
1223 "temperature": temperature,
1224 "tool_choice": tool_choice,
1225 "tools": tools,
1226 "top_logprobs": top_logprobs,
1227 "top_p": top_p,
1228 "user": user,
1229 "verbosity": verbosity,
1230 "web_search_options": web_search_options,
1231 },
1232 completion_create_params.CompletionCreateParamsStreaming
1233 if stream
1234 else completion_create_params.CompletionCreateParamsNonStreaming,
1235 ),
1236 options=make_request_options(
1237 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1238 ),
1239 cast_to=ChatCompletion,
1240 stream=stream or False,
1241 stream_cls=Stream[ChatCompletionChunk],
1242 )
1243
1244 def retrieve(
1245 self,
1246 completion_id: str,
1247 *,
1248 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1249 # The extra values given here take precedence over values defined on the client or passed to this method.
1250 extra_headers: Headers | None = None,
1251 extra_query: Query | None = None,
1252 extra_body: Body | None = None,
1253 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1254 ) -> ChatCompletion:
1255 """Get a stored chat completion.
1256
1257 Only Chat Completions that have been created with
1258 the `store` parameter set to `true` will be returned.
1259
1260 Args:
1261 extra_headers: Send extra headers
1262
1263 extra_query: Add additional query parameters to the request
1264
1265 extra_body: Add additional JSON properties to the request
1266
1267 timeout: Override the client-level default timeout for this request, in seconds
1268 """
1269 if not completion_id:
1270 raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
1271 return self._get(
1272 f"/chat/completions/{completion_id}",
1273 options=make_request_options(
1274 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1275 ),
1276 cast_to=ChatCompletion,
1277 )
1278
1279 def update(
1280 self,
1281 completion_id: str,
1282 *,
1283 metadata: Optional[Metadata],
1284 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1285 # The extra values given here take precedence over values defined on the client or passed to this method.
1286 extra_headers: Headers | None = None,
1287 extra_query: Query | None = None,
1288 extra_body: Body | None = None,
1289 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1290 ) -> ChatCompletion:
1291 """Modify a stored chat completion.
1292
1293 Only Chat Completions that have been created
1294 with the `store` parameter set to `true` can be modified. Currently, the only
1295 supported modification is to update the `metadata` field.
1296
1297 Args:
1298 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1299 for storing additional information about the object in a structured format, and
1300 querying for objects via API or the dashboard.
1301
1302 Keys are strings with a maximum length of 64 characters. Values are strings with
1303 a maximum length of 512 characters.
1304
1305 extra_headers: Send extra headers
1306
1307 extra_query: Add additional query parameters to the request
1308
1309 extra_body: Add additional JSON properties to the request
1310
1311 timeout: Override the client-level default timeout for this request, in seconds
1312 """
1313 if not completion_id:
1314 raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
1315 return self._post(
1316 f"/chat/completions/{completion_id}",
1317 body=maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
1318 options=make_request_options(
1319 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1320 ),
1321 cast_to=ChatCompletion,
1322 )
1323
1324 def list(
1325 self,
1326 *,
1327 after: str | Omit = omit,
1328 limit: int | Omit = omit,
1329 metadata: Optional[Metadata] | Omit = omit,
1330 model: str | Omit = omit,
1331 order: Literal["asc", "desc"] | Omit = omit,
1332 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1333 # The extra values given here take precedence over values defined on the client or passed to this method.
1334 extra_headers: Headers | None = None,
1335 extra_query: Query | None = None,
1336 extra_body: Body | None = None,
1337 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1338 ) -> SyncCursorPage[ChatCompletion]:
1339 """List stored Chat Completions.
1340
1341 Only Chat Completions that have been stored with
1342 the `store` parameter set to `true` will be returned.
1343
1344 Args:
1345 after: Identifier for the last chat completion from the previous pagination request.
1346
1347 limit: Number of Chat Completions to retrieve.
1348
1349 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1350 for storing additional information about the object in a structured format, and
1351 querying for objects via API or the dashboard.
1352
1353 Keys are strings with a maximum length of 64 characters. Values are strings with
1354 a maximum length of 512 characters.
1355
1356 model: The model used to generate the Chat Completions.
1357
1358 order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
1359 `desc` for descending order. Defaults to `asc`.
1360
1361 extra_headers: Send extra headers
1362
1363 extra_query: Add additional query parameters to the request
1364
1365 extra_body: Add additional JSON properties to the request
1366
1367 timeout: Override the client-level default timeout for this request, in seconds
1368 """
1369 return self._get_api_list(
1370 "/chat/completions",
1371 page=SyncCursorPage[ChatCompletion],
1372 options=make_request_options(
1373 extra_headers=extra_headers,
1374 extra_query=extra_query,
1375 extra_body=extra_body,
1376 timeout=timeout,
1377 query=maybe_transform(
1378 {
1379 "after": after,
1380 "limit": limit,
1381 "metadata": metadata,
1382 "model": model,
1383 "order": order,
1384 },
1385 completion_list_params.CompletionListParams,
1386 ),
1387 ),
1388 model=ChatCompletion,
1389 )
1390
1391 def delete(
1392 self,
1393 completion_id: str,
1394 *,
1395 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1396 # The extra values given here take precedence over values defined on the client or passed to this method.
1397 extra_headers: Headers | None = None,
1398 extra_query: Query | None = None,
1399 extra_body: Body | None = None,
1400 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1401 ) -> ChatCompletionDeleted:
1402 """Delete a stored chat completion.
1403
1404 Only Chat Completions that have been created
1405 with the `store` parameter set to `true` can be deleted.
1406
1407 Args:
1408 extra_headers: Send extra headers
1409
1410 extra_query: Add additional query parameters to the request
1411
1412 extra_body: Add additional JSON properties to the request
1413
1414 timeout: Override the client-level default timeout for this request, in seconds
1415 """
1416 if not completion_id:
1417 raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
1418 return self._delete(
1419 f"/chat/completions/{completion_id}",
1420 options=make_request_options(
1421 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1422 ),
1423 cast_to=ChatCompletionDeleted,
1424 )
1425
1426 def stream(
1427 self,
1428 *,
1429 messages: Iterable[ChatCompletionMessageParam],
1430 model: Union[str, ChatModel],
1431 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
1432 response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | Omit = omit,
1433 frequency_penalty: Optional[float] | Omit = omit,
1434 function_call: completion_create_params.FunctionCall | Omit = omit,
1435 functions: Iterable[completion_create_params.Function] | Omit = omit,
1436 logit_bias: Optional[Dict[str, int]] | Omit = omit,
1437 logprobs: Optional[bool] | Omit = omit,
1438 max_completion_tokens: Optional[int] | Omit = omit,
1439 max_tokens: Optional[int] | Omit = omit,
1440 metadata: Optional[Metadata] | Omit = omit,
1441 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
1442 n: Optional[int] | Omit = omit,
1443 parallel_tool_calls: bool | Omit = omit,
1444 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
1445 presence_penalty: Optional[float] | Omit = omit,
1446 prompt_cache_key: str | Omit = omit,
1447 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1448 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
1449 safety_identifier: str | Omit = omit,
1450 seed: Optional[int] | Omit = omit,
1451 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1452 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
1453 store: Optional[bool] | Omit = omit,
1454 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
1455 temperature: Optional[float] | Omit = omit,
1456 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
1457 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
1458 top_logprobs: Optional[int] | Omit = omit,
1459 top_p: Optional[float] | Omit = omit,
1460 user: str | Omit = omit,
1461 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
1462 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
1463 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1464 # The extra values given here take precedence over values defined on the client or passed to this method.
1465 extra_headers: Headers | None = None,
1466 extra_query: Query | None = None,
1467 extra_body: Body | None = None,
1468 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1469 ) -> ChatCompletionStreamManager[ResponseFormatT]:
1470 """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
1471 and automatic accumulation of each delta.
1472
1473 This also supports all of the parsing utilities that `.parse()` does.
1474
1475 Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
1476
1477 ```py
1478 with client.chat.completions.stream(
1479 model="gpt-4o-2024-08-06",
1480 messages=[...],
1481 ) as stream:
1482 for event in stream:
1483 if event.type == "content.delta":
1484 print(event.delta, flush=True, end="")
1485 ```
1486
1487 When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
1488
1489 When the context manager exits, the response will be closed, however the `stream` instance is still available outside
1490 the context manager.
1491 """
1492 extra_headers = {
1493 "X-Stainless-Helper-Method": "chat.completions.stream",
1494 **(extra_headers or {}),
1495 }
1496
1497 api_request: partial[Stream[ChatCompletionChunk]] = partial(
1498 self.create,
1499 messages=messages,
1500 model=model,
1501 audio=audio,
1502 stream=True,
1503 response_format=_type_to_response_format(response_format),
1504 frequency_penalty=frequency_penalty,
1505 function_call=function_call,
1506 functions=functions,
1507 logit_bias=logit_bias,
1508 logprobs=logprobs,
1509 max_completion_tokens=max_completion_tokens,
1510 max_tokens=max_tokens,
1511 metadata=metadata,
1512 modalities=modalities,
1513 n=n,
1514 parallel_tool_calls=parallel_tool_calls,
1515 prediction=prediction,
1516 presence_penalty=presence_penalty,
1517 prompt_cache_key=prompt_cache_key,
1518 prompt_cache_retention=prompt_cache_retention,
1519 reasoning_effort=reasoning_effort,
1520 safety_identifier=safety_identifier,
1521 seed=seed,
1522 service_tier=service_tier,
1523 store=store,
1524 stop=stop,
1525 stream_options=stream_options,
1526 temperature=temperature,
1527 tool_choice=tool_choice,
1528 tools=tools,
1529 top_logprobs=top_logprobs,
1530 top_p=top_p,
1531 user=user,
1532 verbosity=verbosity,
1533 web_search_options=web_search_options,
1534 extra_headers=extra_headers,
1535 extra_query=extra_query,
1536 extra_body=extra_body,
1537 timeout=timeout,
1538 )
1539 return ChatCompletionStreamManager(
1540 api_request,
1541 response_format=response_format,
1542 input_tools=tools,
1543 )
1544
1545
1546class AsyncCompletions(AsyncAPIResource):
1547 @cached_property
1548 def messages(self) -> AsyncMessages:
1549 return AsyncMessages(self._client)
1550
1551 @cached_property
1552 def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
1553 """
1554 This property can be used as a prefix for any HTTP method call to return
1555 the raw response object instead of the parsed content.
1556
1557 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
1558 """
1559 return AsyncCompletionsWithRawResponse(self)
1560
1561 @cached_property
1562 def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
1563 """
1564 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
1565
1566 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
1567 """
1568 return AsyncCompletionsWithStreamingResponse(self)
1569
1570 async def parse(
1571 self,
1572 *,
1573 messages: Iterable[ChatCompletionMessageParam],
1574 model: Union[str, ChatModel],
1575 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
1576 response_format: type[ResponseFormatT] | Omit = omit,
1577 frequency_penalty: Optional[float] | Omit = omit,
1578 function_call: completion_create_params.FunctionCall | Omit = omit,
1579 functions: Iterable[completion_create_params.Function] | Omit = omit,
1580 logit_bias: Optional[Dict[str, int]] | Omit = omit,
1581 logprobs: Optional[bool] | Omit = omit,
1582 max_completion_tokens: Optional[int] | Omit = omit,
1583 max_tokens: Optional[int] | Omit = omit,
1584 metadata: Optional[Metadata] | Omit = omit,
1585 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
1586 n: Optional[int] | Omit = omit,
1587 parallel_tool_calls: bool | Omit = omit,
1588 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
1589 presence_penalty: Optional[float] | Omit = omit,
1590 prompt_cache_key: str | Omit = omit,
1591 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1592 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
1593 safety_identifier: str | Omit = omit,
1594 seed: Optional[int] | Omit = omit,
1595 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1596 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
1597 store: Optional[bool] | Omit = omit,
1598 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
1599 temperature: Optional[float] | Omit = omit,
1600 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
1601 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
1602 top_logprobs: Optional[int] | Omit = omit,
1603 top_p: Optional[float] | Omit = omit,
1604 user: str | Omit = omit,
1605 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
1606 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
1607 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1608 # The extra values given here take precedence over values defined on the client or passed to this method.
1609 extra_headers: Headers | None = None,
1610 extra_query: Query | None = None,
1611 extra_body: Body | None = None,
1612 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1613 ) -> ParsedChatCompletion[ResponseFormatT]:
1614 """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
1615 & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
1616
1617 You can pass a pydantic model to this method and it will automatically convert the model
1618 into a JSON schema, send it to the API and parse the response content back into the given model.
1619
1620 This method will also automatically parse `function` tool calls if:
1621 - You use the `openai.pydantic_function_tool()` helper method
1622 - You mark your tool schema with `"strict": True`
1623
1624 Example usage:
1625 ```py
1626 from pydantic import BaseModel
1627 from openai import AsyncOpenAI
1628
1629
1630 class Step(BaseModel):
1631 explanation: str
1632 output: str
1633
1634
1635 class MathResponse(BaseModel):
1636 steps: List[Step]
1637 final_answer: str
1638
1639
1640 client = AsyncOpenAI()
1641 completion = await client.chat.completions.parse(
1642 model="gpt-4o-2024-08-06",
1643 messages=[
1644 {"role": "system", "content": "You are a helpful math tutor."},
1645 {"role": "user", "content": "solve 8x + 31 = 2"},
1646 ],
1647 response_format=MathResponse,
1648 )
1649
1650 message = completion.choices[0].message
1651 if message.parsed:
1652 print(message.parsed.steps)
1653 print("answer: ", message.parsed.final_answer)
1654 ```
1655 """
1656 _validate_input_tools(tools)
1657
1658 extra_headers = {
1659 "X-Stainless-Helper-Method": "chat.completions.parse",
1660 **(extra_headers or {}),
1661 }
1662
1663 def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
1664 return _parse_chat_completion(
1665 response_format=response_format,
1666 chat_completion=raw_completion,
1667 input_tools=tools,
1668 )
1669
1670 return await self._post(
1671 "/chat/completions",
1672 body=await async_maybe_transform(
1673 {
1674 "messages": messages,
1675 "model": model,
1676 "audio": audio,
1677 "frequency_penalty": frequency_penalty,
1678 "function_call": function_call,
1679 "functions": functions,
1680 "logit_bias": logit_bias,
1681 "logprobs": logprobs,
1682 "max_completion_tokens": max_completion_tokens,
1683 "max_tokens": max_tokens,
1684 "metadata": metadata,
1685 "modalities": modalities,
1686 "n": n,
1687 "parallel_tool_calls": parallel_tool_calls,
1688 "prediction": prediction,
1689 "presence_penalty": presence_penalty,
1690 "prompt_cache_key": prompt_cache_key,
1691 "prompt_cache_retention": prompt_cache_retention,
1692 "reasoning_effort": reasoning_effort,
1693 "response_format": _type_to_response_format(response_format),
1694 "safety_identifier": safety_identifier,
1695 "seed": seed,
1696 "service_tier": service_tier,
1697 "store": store,
1698 "stop": stop,
1699 "stream": False,
1700 "stream_options": stream_options,
1701 "temperature": temperature,
1702 "tool_choice": tool_choice,
1703 "tools": tools,
1704 "top_logprobs": top_logprobs,
1705 "top_p": top_p,
1706 "user": user,
1707 "verbosity": verbosity,
1708 "web_search_options": web_search_options,
1709 },
1710 completion_create_params.CompletionCreateParams,
1711 ),
1712 options=make_request_options(
1713 extra_headers=extra_headers,
1714 extra_query=extra_query,
1715 extra_body=extra_body,
1716 timeout=timeout,
1717 post_parser=parser,
1718 ),
1719 # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
1720 # in the `parser` function above
1721 cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
1722 stream=False,
1723 )
1724
1725 @overload
1726 async def create(
1727 self,
1728 *,
1729 messages: Iterable[ChatCompletionMessageParam],
1730 model: Union[str, ChatModel],
1731 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
1732 frequency_penalty: Optional[float] | Omit = omit,
1733 function_call: completion_create_params.FunctionCall | Omit = omit,
1734 functions: Iterable[completion_create_params.Function] | Omit = omit,
1735 logit_bias: Optional[Dict[str, int]] | Omit = omit,
1736 logprobs: Optional[bool] | Omit = omit,
1737 max_completion_tokens: Optional[int] | Omit = omit,
1738 max_tokens: Optional[int] | Omit = omit,
1739 metadata: Optional[Metadata] | Omit = omit,
1740 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
1741 n: Optional[int] | Omit = omit,
1742 parallel_tool_calls: bool | Omit = omit,
1743 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
1744 presence_penalty: Optional[float] | Omit = omit,
1745 prompt_cache_key: str | Omit = omit,
1746 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1747 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
1748 response_format: completion_create_params.ResponseFormat | Omit = omit,
1749 safety_identifier: str | Omit = omit,
1750 seed: Optional[int] | Omit = omit,
1751 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1752 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
1753 store: Optional[bool] | Omit = omit,
1754 stream: Optional[Literal[False]] | Omit = omit,
1755 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
1756 temperature: Optional[float] | Omit = omit,
1757 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
1758 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
1759 top_logprobs: Optional[int] | Omit = omit,
1760 top_p: Optional[float] | Omit = omit,
1761 user: str | Omit = omit,
1762 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
1763 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
1764 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1765 # The extra values given here take precedence over values defined on the client or passed to this method.
1766 extra_headers: Headers | None = None,
1767 extra_query: Query | None = None,
1768 extra_body: Body | None = None,
1769 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1770 ) -> ChatCompletion:
1771 """
1772 **Starting a new project?** We recommend trying
1773 [Responses](https://platform.openai.com/docs/api-reference/responses) to take
1774 advantage of the latest OpenAI platform features. Compare
1775 [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
1776
1777 ---
1778
1779 Creates a model response for the given chat conversation. Learn more in the
1780 [text generation](https://platform.openai.com/docs/guides/text-generation),
1781 [vision](https://platform.openai.com/docs/guides/vision), and
1782 [audio](https://platform.openai.com/docs/guides/audio) guides.
1783
1784 Parameter support can differ depending on the model used to generate the
1785 response, particularly for newer reasoning models. Parameters that are only
1786 supported for reasoning models are noted below. For the current state of
1787 unsupported parameters in reasoning models,
1788 [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
1789
1790 Args:
1791 messages: A list of messages comprising the conversation so far. Depending on the
1792 [model](https://platform.openai.com/docs/models) you use, different message
1793 types (modalities) are supported, like
1794 [text](https://platform.openai.com/docs/guides/text-generation),
1795 [images](https://platform.openai.com/docs/guides/vision), and
1796 [audio](https://platform.openai.com/docs/guides/audio).
1797
1798 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
1799 wide range of models with different capabilities, performance characteristics,
1800 and price points. Refer to the
1801 [model guide](https://platform.openai.com/docs/models) to browse and compare
1802 available models.
1803
1804 audio: Parameters for audio output. Required when audio output is requested with
1805 `modalities: ["audio"]`.
1806 [Learn more](https://platform.openai.com/docs/guides/audio).
1807
1808 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
1809 existing frequency in the text so far, decreasing the model's likelihood to
1810 repeat the same line verbatim.
1811
1812 function_call: Deprecated in favor of `tool_choice`.
1813
1814 Controls which (if any) function is called by the model.
1815
1816 `none` means the model will not call a function and instead generates a message.
1817
1818 `auto` means the model can pick between generating a message or calling a
1819 function.
1820
1821 Specifying a particular function via `{"name": "my_function"}` forces the model
1822 to call that function.
1823
1824 `none` is the default when no functions are present. `auto` is the default if
1825 functions are present.
1826
1827 functions: Deprecated in favor of `tools`.
1828
1829 A list of functions the model may generate JSON inputs for.
1830
1831 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
1832
1833 Accepts a JSON object that maps tokens (specified by their token ID in the
1834 tokenizer) to an associated bias value from -100 to 100. Mathematically, the
1835 bias is added to the logits generated by the model prior to sampling. The exact
1836 effect will vary per model, but values between -1 and 1 should decrease or
1837 increase likelihood of selection; values like -100 or 100 should result in a ban
1838 or exclusive selection of the relevant token.
1839
1840 logprobs: Whether to return log probabilities of the output tokens or not. If true,
1841 returns the log probabilities of each output token returned in the `content` of
1842 `message`.
1843
1844 max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
1845 including visible output tokens and
1846 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
1847
1848 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
1849 completion. This value can be used to control
1850 [costs](https://openai.com/api/pricing/) for text generated via API.
1851
1852 This value is now deprecated in favor of `max_completion_tokens`, and is not
1853 compatible with
1854 [o-series models](https://platform.openai.com/docs/guides/reasoning).
1855
1856 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1857 for storing additional information about the object in a structured format, and
1858 querying for objects via API or the dashboard.
1859
1860 Keys are strings with a maximum length of 64 characters. Values are strings with
1861 a maximum length of 512 characters.
1862
1863 modalities: Output types that you would like the model to generate. Most models are capable
1864 of generating text, which is the default:
1865
1866 `["text"]`
1867
1868 The `gpt-4o-audio-preview` model can also be used to
1869 [generate audio](https://platform.openai.com/docs/guides/audio). To request that
1870 this model generate both text and audio responses, you can use:
1871
1872 `["text", "audio"]`
1873
1874 n: How many chat completion choices to generate for each input message. Note that
1875 you will be charged based on the number of generated tokens across all of the
1876 choices. Keep `n` as `1` to minimize costs.
1877
1878 parallel_tool_calls: Whether to enable
1879 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
1880 during tool use.
1881
1882 prediction: Static predicted output content, such as the content of a text file that is
1883 being regenerated.
1884
1885 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
1886 whether they appear in the text so far, increasing the model's likelihood to
1887 talk about new topics.
1888
1889 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
1890 hit rates. Replaces the `user` field.
1891 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
1892
1893 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
1894 prompt caching, which keeps cached prefixes active for longer, up to a maximum
1895 of 24 hours.
1896 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
1897
1898 reasoning_effort: Constrains effort on reasoning for
1899 [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
1900 supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
1901 Reducing reasoning effort can result in faster responses and fewer tokens used
1902 on reasoning in a response.
1903
1904 - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
1905 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
1906 calls are supported for all reasoning values in gpt-5.1.
1907 - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
1908 support `none`.
1909 - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
1910 - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
1911
1912 response_format: An object specifying the format that the model must output.
1913
1914 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
1915 Outputs which ensures the model will match your supplied JSON schema. Learn more
1916 in the
1917 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
1918
1919 Setting to `{ "type": "json_object" }` enables the older JSON mode, which
1920 ensures the message the model generates is valid JSON. Using `json_schema` is
1921 preferred for models that support it.
1922
1923 safety_identifier: A stable identifier used to help detect users of your application that may be
1924 violating OpenAI's usage policies. The IDs should be a string that uniquely
1925 identifies each user. We recommend hashing their username or email address, in
1926 order to avoid sending us any identifying information.
1927 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
1928
1929 seed: This feature is in Beta. If specified, our system will make a best effort to
1930 sample deterministically, such that repeated requests with the same `seed` and
1931 parameters should return the same result. Determinism is not guaranteed, and you
1932 should refer to the `system_fingerprint` response parameter to monitor changes
1933 in the backend.
1934
1935 service_tier: Specifies the processing type used for serving the request.
1936
1937 - If set to 'auto', then the request will be processed with the service tier
1938 configured in the Project settings. Unless otherwise configured, the Project
1939 will use 'default'.
1940 - If set to 'default', then the request will be processed with the standard
1941 pricing and performance for the selected model.
1942 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
1943 '[priority](https://openai.com/api-priority-processing/)', then the request
1944 will be processed with the corresponding service tier.
1945 - When not set, the default behavior is 'auto'.
1946
1947 When the `service_tier` parameter is set, the response body will include the
1948 `service_tier` value based on the processing mode actually used to serve the
1949 request. This response value may be different from the value set in the
1950 parameter.
1951
1952 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
1953
1954 Up to 4 sequences where the API will stop generating further tokens. The
1955 returned text will not contain the stop sequence.
1956
1957 store: Whether or not to store the output of this chat completion request for use in
1958 our [model distillation](https://platform.openai.com/docs/guides/distillation)
1959 or [evals](https://platform.openai.com/docs/guides/evals) products.
1960
1961 Supports text and image inputs. Note: image inputs over 8MB will be dropped.
1962
1963 stream: If set to true, the model response data will be streamed to the client as it is
1964 generated using
1965 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
1966 See the
1967 [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
1968 for more information, along with the
1969 [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
1970 guide for more information on how to handle the streaming events.
1971
1972 stream_options: Options for streaming response. Only set this when you set `stream: true`.
1973
1974 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1975 make the output more random, while lower values like 0.2 will make it more
1976 focused and deterministic. We generally recommend altering this or `top_p` but
1977 not both.
1978
1979 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
1980 not call any tool and instead generates a message. `auto` means the model can
1981 pick between generating a message or calling one or more tools. `required` means
1982 the model must call one or more tools. Specifying a particular tool via
1983 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
1984 call that tool.
1985
1986 `none` is the default when no tools are present. `auto` is the default if tools
1987 are present.
1988
1989 tools: A list of tools the model may call. You can provide either
1990 [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
1991 or [function tools](https://platform.openai.com/docs/guides/function-calling).
1992
1993 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
1994 return at each token position, each with an associated log probability.
1995 `logprobs` must be set to `true` if this parameter is used.
1996
1997 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1998 model considers the results of the tokens with top_p probability mass. So 0.1
1999 means only the tokens comprising the top 10% probability mass are considered.
2000
2001 We generally recommend altering this or `temperature` but not both.
2002
2003 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
2004 `prompt_cache_key` instead to maintain caching optimizations. A stable
2005 identifier for your end-users. Used to boost cache hit rates by better bucketing
2006 similar requests and to help OpenAI detect and prevent abuse.
2007 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2008
2009 verbosity: Constrains the verbosity of the model's response. Lower values will result in
2010 more concise responses, while higher values will result in more verbose
2011 responses. Currently supported values are `low`, `medium`, and `high`.
2012
2013 web_search_options: This tool searches the web for relevant results to use in a response. Learn more
2014 about the
2015 [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
2016
2017 extra_headers: Send extra headers
2018
2019 extra_query: Add additional query parameters to the request
2020
2021 extra_body: Add additional JSON properties to the request
2022
2023 timeout: Override the client-level default timeout for this request, in seconds
2024 """
2025 ...
2026
2027 @overload
2028 async def create(
2029 self,
2030 *,
2031 messages: Iterable[ChatCompletionMessageParam],
2032 model: Union[str, ChatModel],
2033 stream: Literal[True],
2034 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
2035 frequency_penalty: Optional[float] | Omit = omit,
2036 function_call: completion_create_params.FunctionCall | Omit = omit,
2037 functions: Iterable[completion_create_params.Function] | Omit = omit,
2038 logit_bias: Optional[Dict[str, int]] | Omit = omit,
2039 logprobs: Optional[bool] | Omit = omit,
2040 max_completion_tokens: Optional[int] | Omit = omit,
2041 max_tokens: Optional[int] | Omit = omit,
2042 metadata: Optional[Metadata] | Omit = omit,
2043 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
2044 n: Optional[int] | Omit = omit,
2045 parallel_tool_calls: bool | Omit = omit,
2046 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
2047 presence_penalty: Optional[float] | Omit = omit,
2048 prompt_cache_key: str | Omit = omit,
2049 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2050 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
2051 response_format: completion_create_params.ResponseFormat | Omit = omit,
2052 safety_identifier: str | Omit = omit,
2053 seed: Optional[int] | Omit = omit,
2054 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2055 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
2056 store: Optional[bool] | Omit = omit,
2057 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
2058 temperature: Optional[float] | Omit = omit,
2059 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
2060 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
2061 top_logprobs: Optional[int] | Omit = omit,
2062 top_p: Optional[float] | Omit = omit,
2063 user: str | Omit = omit,
2064 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
2065 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
2066 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2067 # The extra values given here take precedence over values defined on the client or passed to this method.
2068 extra_headers: Headers | None = None,
2069 extra_query: Query | None = None,
2070 extra_body: Body | None = None,
2071 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2072 ) -> AsyncStream[ChatCompletionChunk]:
2073 """
2074 **Starting a new project?** We recommend trying
2075 [Responses](https://platform.openai.com/docs/api-reference/responses) to take
2076 advantage of the latest OpenAI platform features. Compare
2077 [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
2078
2079 ---
2080
2081 Creates a model response for the given chat conversation. Learn more in the
2082 [text generation](https://platform.openai.com/docs/guides/text-generation),
2083 [vision](https://platform.openai.com/docs/guides/vision), and
2084 [audio](https://platform.openai.com/docs/guides/audio) guides.
2085
2086 Parameter support can differ depending on the model used to generate the
2087 response, particularly for newer reasoning models. Parameters that are only
2088 supported for reasoning models are noted below. For the current state of
2089 unsupported parameters in reasoning models,
2090 [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
2091
2092 Args:
2093 messages: A list of messages comprising the conversation so far. Depending on the
2094 [model](https://platform.openai.com/docs/models) you use, different message
2095 types (modalities) are supported, like
2096 [text](https://platform.openai.com/docs/guides/text-generation),
2097 [images](https://platform.openai.com/docs/guides/vision), and
2098 [audio](https://platform.openai.com/docs/guides/audio).
2099
2100 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
2101 wide range of models with different capabilities, performance characteristics,
2102 and price points. Refer to the
2103 [model guide](https://platform.openai.com/docs/models) to browse and compare
2104 available models.
2105
2106 stream: If set to true, the model response data will be streamed to the client as it is
2107 generated using
2108 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2109 See the
2110 [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
2111 for more information, along with the
2112 [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
2113 guide for more information on how to handle the streaming events.
2114
2115 audio: Parameters for audio output. Required when audio output is requested with
2116 `modalities: ["audio"]`.
2117 [Learn more](https://platform.openai.com/docs/guides/audio).
2118
2119 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
2120 existing frequency in the text so far, decreasing the model's likelihood to
2121 repeat the same line verbatim.
2122
2123 function_call: Deprecated in favor of `tool_choice`.
2124
2125 Controls which (if any) function is called by the model.
2126
2127 `none` means the model will not call a function and instead generates a message.
2128
2129 `auto` means the model can pick between generating a message or calling a
2130 function.
2131
2132 Specifying a particular function via `{"name": "my_function"}` forces the model
2133 to call that function.
2134
2135 `none` is the default when no functions are present. `auto` is the default if
2136 functions are present.
2137
2138 functions: Deprecated in favor of `tools`.
2139
2140 A list of functions the model may generate JSON inputs for.
2141
2142 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
2143
2144 Accepts a JSON object that maps tokens (specified by their token ID in the
2145 tokenizer) to an associated bias value from -100 to 100. Mathematically, the
2146 bias is added to the logits generated by the model prior to sampling. The exact
2147 effect will vary per model, but values between -1 and 1 should decrease or
2148 increase likelihood of selection; values like -100 or 100 should result in a ban
2149 or exclusive selection of the relevant token.
2150
2151 logprobs: Whether to return log probabilities of the output tokens or not. If true,
2152 returns the log probabilities of each output token returned in the `content` of
2153 `message`.
2154
2155 max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
2156 including visible output tokens and
2157 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
2158
2159 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
2160 completion. This value can be used to control
2161 [costs](https://openai.com/api/pricing/) for text generated via API.
2162
2163 This value is now deprecated in favor of `max_completion_tokens`, and is not
2164 compatible with
2165 [o-series models](https://platform.openai.com/docs/guides/reasoning).
2166
2167 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2168 for storing additional information about the object in a structured format, and
2169 querying for objects via API or the dashboard.
2170
2171 Keys are strings with a maximum length of 64 characters. Values are strings with
2172 a maximum length of 512 characters.
2173
2174 modalities: Output types that you would like the model to generate. Most models are capable
2175 of generating text, which is the default:
2176
2177 `["text"]`
2178
2179 The `gpt-4o-audio-preview` model can also be used to
2180 [generate audio](https://platform.openai.com/docs/guides/audio). To request that
2181 this model generate both text and audio responses, you can use:
2182
2183 `["text", "audio"]`
2184
2185 n: How many chat completion choices to generate for each input message. Note that
2186 you will be charged based on the number of generated tokens across all of the
2187 choices. Keep `n` as `1` to minimize costs.
2188
2189 parallel_tool_calls: Whether to enable
2190 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
2191 during tool use.
2192
2193 prediction: Static predicted output content, such as the content of a text file that is
2194 being regenerated.
2195
2196 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
2197 whether they appear in the text so far, increasing the model's likelihood to
2198 talk about new topics.
2199
2200 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
2201 hit rates. Replaces the `user` field.
2202 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
2203
2204 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
2205 prompt caching, which keeps cached prefixes active for longer, up to a maximum
2206 of 24 hours.
2207 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
2208
2209 reasoning_effort: Constrains effort on reasoning for
2210 [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
2211 supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
2212 Reducing reasoning effort can result in faster responses and fewer tokens used
2213 on reasoning in a response.
2214
2215 - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
2216 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
2217 calls are supported for all reasoning values in gpt-5.1.
2218 - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
2219 support `none`.
2220 - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
2221 - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
2222
2223 response_format: An object specifying the format that the model must output.
2224
2225 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
2226 Outputs which ensures the model will match your supplied JSON schema. Learn more
2227 in the
2228 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
2229
2230 Setting to `{ "type": "json_object" }` enables the older JSON mode, which
2231 ensures the message the model generates is valid JSON. Using `json_schema` is
2232 preferred for models that support it.
2233
2234 safety_identifier: A stable identifier used to help detect users of your application that may be
2235 violating OpenAI's usage policies. The IDs should be a string that uniquely
2236 identifies each user. We recommend hashing their username or email address, in
2237 order to avoid sending us any identifying information.
2238 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2239
2240 seed: This feature is in Beta. If specified, our system will make a best effort to
2241 sample deterministically, such that repeated requests with the same `seed` and
2242 parameters should return the same result. Determinism is not guaranteed, and you
2243 should refer to the `system_fingerprint` response parameter to monitor changes
2244 in the backend.
2245
2246 service_tier: Specifies the processing type used for serving the request.
2247
2248 - If set to 'auto', then the request will be processed with the service tier
2249 configured in the Project settings. Unless otherwise configured, the Project
2250 will use 'default'.
2251 - If set to 'default', then the request will be processed with the standard
2252 pricing and performance for the selected model.
2253 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
2254 '[priority](https://openai.com/api-priority-processing/)', then the request
2255 will be processed with the corresponding service tier.
2256 - When not set, the default behavior is 'auto'.
2257
2258 When the `service_tier` parameter is set, the response body will include the
2259 `service_tier` value based on the processing mode actually used to serve the
2260 request. This response value may be different from the value set in the
2261 parameter.
2262
2263 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
2264
2265 Up to 4 sequences where the API will stop generating further tokens. The
2266 returned text will not contain the stop sequence.
2267
2268 store: Whether or not to store the output of this chat completion request for use in
2269 our [model distillation](https://platform.openai.com/docs/guides/distillation)
2270 or [evals](https://platform.openai.com/docs/guides/evals) products.
2271
2272 Supports text and image inputs. Note: image inputs over 8MB will be dropped.
2273
2274 stream_options: Options for streaming response. Only set this when you set `stream: true`.
2275
2276 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
2277 make the output more random, while lower values like 0.2 will make it more
2278 focused and deterministic. We generally recommend altering this or `top_p` but
2279 not both.
2280
2281 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
2282 not call any tool and instead generates a message. `auto` means the model can
2283 pick between generating a message or calling one or more tools. `required` means
2284 the model must call one or more tools. Specifying a particular tool via
2285 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
2286 call that tool.
2287
2288 `none` is the default when no tools are present. `auto` is the default if tools
2289 are present.
2290
2291 tools: A list of tools the model may call. You can provide either
2292 [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
2293 or [function tools](https://platform.openai.com/docs/guides/function-calling).
2294
2295 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
2296 return at each token position, each with an associated log probability.
2297 `logprobs` must be set to `true` if this parameter is used.
2298
2299 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
2300 model considers the results of the tokens with top_p probability mass. So 0.1
2301 means only the tokens comprising the top 10% probability mass are considered.
2302
2303 We generally recommend altering this or `temperature` but not both.
2304
2305 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
2306 `prompt_cache_key` instead to maintain caching optimizations. A stable
2307 identifier for your end-users. Used to boost cache hit rates by better bucketing
2308 similar requests and to help OpenAI detect and prevent abuse.
2309 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2310
2311 verbosity: Constrains the verbosity of the model's response. Lower values will result in
2312 more concise responses, while higher values will result in more verbose
2313 responses. Currently supported values are `low`, `medium`, and `high`.
2314
2315 web_search_options: This tool searches the web for relevant results to use in a response. Learn more
2316 about the
2317 [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
2318
2319 extra_headers: Send extra headers
2320
2321 extra_query: Add additional query parameters to the request
2322
2323 extra_body: Add additional JSON properties to the request
2324
2325 timeout: Override the client-level default timeout for this request, in seconds
2326 """
2327 ...
2328
2329 @overload
2330 async def create(
2331 self,
2332 *,
2333 messages: Iterable[ChatCompletionMessageParam],
2334 model: Union[str, ChatModel],
2335 stream: bool,
2336 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
2337 frequency_penalty: Optional[float] | Omit = omit,
2338 function_call: completion_create_params.FunctionCall | Omit = omit,
2339 functions: Iterable[completion_create_params.Function] | Omit = omit,
2340 logit_bias: Optional[Dict[str, int]] | Omit = omit,
2341 logprobs: Optional[bool] | Omit = omit,
2342 max_completion_tokens: Optional[int] | Omit = omit,
2343 max_tokens: Optional[int] | Omit = omit,
2344 metadata: Optional[Metadata] | Omit = omit,
2345 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
2346 n: Optional[int] | Omit = omit,
2347 parallel_tool_calls: bool | Omit = omit,
2348 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
2349 presence_penalty: Optional[float] | Omit = omit,
2350 prompt_cache_key: str | Omit = omit,
2351 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2352 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
2353 response_format: completion_create_params.ResponseFormat | Omit = omit,
2354 safety_identifier: str | Omit = omit,
2355 seed: Optional[int] | Omit = omit,
2356 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2357 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
2358 store: Optional[bool] | Omit = omit,
2359 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
2360 temperature: Optional[float] | Omit = omit,
2361 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
2362 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
2363 top_logprobs: Optional[int] | Omit = omit,
2364 top_p: Optional[float] | Omit = omit,
2365 user: str | Omit = omit,
2366 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
2367 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
2368 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2369 # The extra values given here take precedence over values defined on the client or passed to this method.
2370 extra_headers: Headers | None = None,
2371 extra_query: Query | None = None,
2372 extra_body: Body | None = None,
2373 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2374 ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
2375 """
2376 **Starting a new project?** We recommend trying
2377 [Responses](https://platform.openai.com/docs/api-reference/responses) to take
2378 advantage of the latest OpenAI platform features. Compare
2379 [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
2380
2381 ---
2382
2383 Creates a model response for the given chat conversation. Learn more in the
2384 [text generation](https://platform.openai.com/docs/guides/text-generation),
2385 [vision](https://platform.openai.com/docs/guides/vision), and
2386 [audio](https://platform.openai.com/docs/guides/audio) guides.
2387
2388 Parameter support can differ depending on the model used to generate the
2389 response, particularly for newer reasoning models. Parameters that are only
2390 supported for reasoning models are noted below. For the current state of
2391 unsupported parameters in reasoning models,
2392 [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
2393
2394 Args:
2395 messages: A list of messages comprising the conversation so far. Depending on the
2396 [model](https://platform.openai.com/docs/models) you use, different message
2397 types (modalities) are supported, like
2398 [text](https://platform.openai.com/docs/guides/text-generation),
2399 [images](https://platform.openai.com/docs/guides/vision), and
2400 [audio](https://platform.openai.com/docs/guides/audio).
2401
2402 model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
2403 wide range of models with different capabilities, performance characteristics,
2404 and price points. Refer to the
2405 [model guide](https://platform.openai.com/docs/models) to browse and compare
2406 available models.
2407
2408 stream: If set to true, the model response data will be streamed to the client as it is
2409 generated using
2410 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2411 See the
2412 [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
2413 for more information, along with the
2414 [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
2415 guide for more information on how to handle the streaming events.
2416
2417 audio: Parameters for audio output. Required when audio output is requested with
2418 `modalities: ["audio"]`.
2419 [Learn more](https://platform.openai.com/docs/guides/audio).
2420
2421 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
2422 existing frequency in the text so far, decreasing the model's likelihood to
2423 repeat the same line verbatim.
2424
2425 function_call: Deprecated in favor of `tool_choice`.
2426
2427 Controls which (if any) function is called by the model.
2428
2429 `none` means the model will not call a function and instead generates a message.
2430
2431 `auto` means the model can pick between generating a message or calling a
2432 function.
2433
2434 Specifying a particular function via `{"name": "my_function"}` forces the model
2435 to call that function.
2436
2437 `none` is the default when no functions are present. `auto` is the default if
2438 functions are present.
2439
2440 functions: Deprecated in favor of `tools`.
2441
2442 A list of functions the model may generate JSON inputs for.
2443
2444 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
2445
2446 Accepts a JSON object that maps tokens (specified by their token ID in the
2447 tokenizer) to an associated bias value from -100 to 100. Mathematically, the
2448 bias is added to the logits generated by the model prior to sampling. The exact
2449 effect will vary per model, but values between -1 and 1 should decrease or
2450 increase likelihood of selection; values like -100 or 100 should result in a ban
2451 or exclusive selection of the relevant token.
2452
2453 logprobs: Whether to return log probabilities of the output tokens or not. If true,
2454 returns the log probabilities of each output token returned in the `content` of
2455 `message`.
2456
2457 max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
2458 including visible output tokens and
2459 [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
2460
2461 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
2462 completion. This value can be used to control
2463 [costs](https://openai.com/api/pricing/) for text generated via API.
2464
2465 This value is now deprecated in favor of `max_completion_tokens`, and is not
2466 compatible with
2467 [o-series models](https://platform.openai.com/docs/guides/reasoning).
2468
2469 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2470 for storing additional information about the object in a structured format, and
2471 querying for objects via API or the dashboard.
2472
2473 Keys are strings with a maximum length of 64 characters. Values are strings with
2474 a maximum length of 512 characters.
2475
2476 modalities: Output types that you would like the model to generate. Most models are capable
2477 of generating text, which is the default:
2478
2479 `["text"]`
2480
2481 The `gpt-4o-audio-preview` model can also be used to
2482 [generate audio](https://platform.openai.com/docs/guides/audio). To request that
2483 this model generate both text and audio responses, you can use:
2484
2485 `["text", "audio"]`
2486
2487 n: How many chat completion choices to generate for each input message. Note that
2488 you will be charged based on the number of generated tokens across all of the
2489 choices. Keep `n` as `1` to minimize costs.
2490
2491 parallel_tool_calls: Whether to enable
2492 [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
2493 during tool use.
2494
2495 prediction: Static predicted output content, such as the content of a text file that is
2496 being regenerated.
2497
2498 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
2499 whether they appear in the text so far, increasing the model's likelihood to
2500 talk about new topics.
2501
2502 prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
2503 hit rates. Replaces the `user` field.
2504 [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
2505
2506 prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
2507 prompt caching, which keeps cached prefixes active for longer, up to a maximum
2508 of 24 hours.
2509 [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
2510
2511 reasoning_effort: Constrains effort on reasoning for
2512 [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
2513 supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
2514 Reducing reasoning effort can result in faster responses and fewer tokens used
2515 on reasoning in a response.
2516
2517 - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
2518 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
2519 calls are supported for all reasoning values in gpt-5.1.
2520 - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
2521 support `none`.
2522 - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
2523 - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
2524
2525 response_format: An object specifying the format that the model must output.
2526
2527 Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
2528 Outputs which ensures the model will match your supplied JSON schema. Learn more
2529 in the
2530 [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
2531
2532 Setting to `{ "type": "json_object" }` enables the older JSON mode, which
2533 ensures the message the model generates is valid JSON. Using `json_schema` is
2534 preferred for models that support it.
2535
2536 safety_identifier: A stable identifier used to help detect users of your application that may be
2537 violating OpenAI's usage policies. The IDs should be a string that uniquely
2538 identifies each user. We recommend hashing their username or email address, in
2539 order to avoid sending us any identifying information.
2540 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2541
2542 seed: This feature is in Beta. If specified, our system will make a best effort to
2543 sample deterministically, such that repeated requests with the same `seed` and
2544 parameters should return the same result. Determinism is not guaranteed, and you
2545 should refer to the `system_fingerprint` response parameter to monitor changes
2546 in the backend.
2547
2548 service_tier: Specifies the processing type used for serving the request.
2549
2550 - If set to 'auto', then the request will be processed with the service tier
2551 configured in the Project settings. Unless otherwise configured, the Project
2552 will use 'default'.
2553 - If set to 'default', then the request will be processed with the standard
2554 pricing and performance for the selected model.
2555 - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
2556 '[priority](https://openai.com/api-priority-processing/)', then the request
2557 will be processed with the corresponding service tier.
2558 - When not set, the default behavior is 'auto'.
2559
2560 When the `service_tier` parameter is set, the response body will include the
2561 `service_tier` value based on the processing mode actually used to serve the
2562 request. This response value may be different from the value set in the
2563 parameter.
2564
2565 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
2566
2567 Up to 4 sequences where the API will stop generating further tokens. The
2568 returned text will not contain the stop sequence.
2569
2570 store: Whether or not to store the output of this chat completion request for use in
2571 our [model distillation](https://platform.openai.com/docs/guides/distillation)
2572 or [evals](https://platform.openai.com/docs/guides/evals) products.
2573
2574 Supports text and image inputs. Note: image inputs over 8MB will be dropped.
2575
2576 stream_options: Options for streaming response. Only set this when you set `stream: true`.
2577
2578 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
2579 make the output more random, while lower values like 0.2 will make it more
2580 focused and deterministic. We generally recommend altering this or `top_p` but
2581 not both.
2582
2583 tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
2584 not call any tool and instead generates a message. `auto` means the model can
2585 pick between generating a message or calling one or more tools. `required` means
2586 the model must call one or more tools. Specifying a particular tool via
2587 `{"type": "function", "function": {"name": "my_function"}}` forces the model to
2588 call that tool.
2589
2590 `none` is the default when no tools are present. `auto` is the default if tools
2591 are present.
2592
2593 tools: A list of tools the model may call. You can provide either
2594 [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
2595 or [function tools](https://platform.openai.com/docs/guides/function-calling).
2596
2597 top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
2598 return at each token position, each with an associated log probability.
2599 `logprobs` must be set to `true` if this parameter is used.
2600
2601 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
2602 model considers the results of the tokens with top_p probability mass. So 0.1
2603 means only the tokens comprising the top 10% probability mass are considered.
2604
2605 We generally recommend altering this or `temperature` but not both.
2606
2607 user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
2608 `prompt_cache_key` instead to maintain caching optimizations. A stable
2609 identifier for your end-users. Used to boost cache hit rates by better bucketing
2610 similar requests and to help OpenAI detect and prevent abuse.
2611 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2612
2613 verbosity: Constrains the verbosity of the model's response. Lower values will result in
2614 more concise responses, while higher values will result in more verbose
2615 responses. Currently supported values are `low`, `medium`, and `high`.
2616
2617 web_search_options: This tool searches the web for relevant results to use in a response. Learn more
2618 about the
2619 [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
2620
2621 extra_headers: Send extra headers
2622
2623 extra_query: Add additional query parameters to the request
2624
2625 extra_body: Add additional JSON properties to the request
2626
2627 timeout: Override the client-level default timeout for this request, in seconds
2628 """
2629 ...
2630
2631 @required_args(["messages", "model"], ["messages", "model", "stream"])
2632 async def create(
2633 self,
2634 *,
2635 messages: Iterable[ChatCompletionMessageParam],
2636 model: Union[str, ChatModel],
2637 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
2638 frequency_penalty: Optional[float] | Omit = omit,
2639 function_call: completion_create_params.FunctionCall | Omit = omit,
2640 functions: Iterable[completion_create_params.Function] | Omit = omit,
2641 logit_bias: Optional[Dict[str, int]] | Omit = omit,
2642 logprobs: Optional[bool] | Omit = omit,
2643 max_completion_tokens: Optional[int] | Omit = omit,
2644 max_tokens: Optional[int] | Omit = omit,
2645 metadata: Optional[Metadata] | Omit = omit,
2646 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
2647 n: Optional[int] | Omit = omit,
2648 parallel_tool_calls: bool | Omit = omit,
2649 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
2650 presence_penalty: Optional[float] | Omit = omit,
2651 prompt_cache_key: str | Omit = omit,
2652 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2653 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
2654 response_format: completion_create_params.ResponseFormat | Omit = omit,
2655 safety_identifier: str | Omit = omit,
2656 seed: Optional[int] | Omit = omit,
2657 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2658 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
2659 store: Optional[bool] | Omit = omit,
2660 stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
2661 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
2662 temperature: Optional[float] | Omit = omit,
2663 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
2664 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
2665 top_logprobs: Optional[int] | Omit = omit,
2666 top_p: Optional[float] | Omit = omit,
2667 user: str | Omit = omit,
2668 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
2669 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
2670 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2671 # The extra values given here take precedence over values defined on the client or passed to this method.
2672 extra_headers: Headers | None = None,
2673 extra_query: Query | None = None,
2674 extra_body: Body | None = None,
2675 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2676 ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
2677 validate_response_format(response_format)
2678 return await self._post(
2679 "/chat/completions",
2680 body=await async_maybe_transform(
2681 {
2682 "messages": messages,
2683 "model": model,
2684 "audio": audio,
2685 "frequency_penalty": frequency_penalty,
2686 "function_call": function_call,
2687 "functions": functions,
2688 "logit_bias": logit_bias,
2689 "logprobs": logprobs,
2690 "max_completion_tokens": max_completion_tokens,
2691 "max_tokens": max_tokens,
2692 "metadata": metadata,
2693 "modalities": modalities,
2694 "n": n,
2695 "parallel_tool_calls": parallel_tool_calls,
2696 "prediction": prediction,
2697 "presence_penalty": presence_penalty,
2698 "prompt_cache_key": prompt_cache_key,
2699 "prompt_cache_retention": prompt_cache_retention,
2700 "reasoning_effort": reasoning_effort,
2701 "response_format": response_format,
2702 "safety_identifier": safety_identifier,
2703 "seed": seed,
2704 "service_tier": service_tier,
2705 "stop": stop,
2706 "store": store,
2707 "stream": stream,
2708 "stream_options": stream_options,
2709 "temperature": temperature,
2710 "tool_choice": tool_choice,
2711 "tools": tools,
2712 "top_logprobs": top_logprobs,
2713 "top_p": top_p,
2714 "user": user,
2715 "verbosity": verbosity,
2716 "web_search_options": web_search_options,
2717 },
2718 completion_create_params.CompletionCreateParamsStreaming
2719 if stream
2720 else completion_create_params.CompletionCreateParamsNonStreaming,
2721 ),
2722 options=make_request_options(
2723 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
2724 ),
2725 cast_to=ChatCompletion,
2726 stream=stream or False,
2727 stream_cls=AsyncStream[ChatCompletionChunk],
2728 )
2729
2730 async def retrieve(
2731 self,
2732 completion_id: str,
2733 *,
2734 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2735 # The extra values given here take precedence over values defined on the client or passed to this method.
2736 extra_headers: Headers | None = None,
2737 extra_query: Query | None = None,
2738 extra_body: Body | None = None,
2739 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2740 ) -> ChatCompletion:
2741 """Get a stored chat completion.
2742
2743 Only Chat Completions that have been created with
2744 the `store` parameter set to `true` will be returned.
2745
2746 Args:
2747 extra_headers: Send extra headers
2748
2749 extra_query: Add additional query parameters to the request
2750
2751 extra_body: Add additional JSON properties to the request
2752
2753 timeout: Override the client-level default timeout for this request, in seconds
2754 """
2755 if not completion_id:
2756 raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
2757 return await self._get(
2758 f"/chat/completions/{completion_id}",
2759 options=make_request_options(
2760 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
2761 ),
2762 cast_to=ChatCompletion,
2763 )
2764
2765 async def update(
2766 self,
2767 completion_id: str,
2768 *,
2769 metadata: Optional[Metadata],
2770 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2771 # The extra values given here take precedence over values defined on the client or passed to this method.
2772 extra_headers: Headers | None = None,
2773 extra_query: Query | None = None,
2774 extra_body: Body | None = None,
2775 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2776 ) -> ChatCompletion:
2777 """Modify a stored chat completion.
2778
2779 Only Chat Completions that have been created
2780 with the `store` parameter set to `true` can be modified. Currently, the only
2781 supported modification is to update the `metadata` field.
2782
2783 Args:
2784 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2785 for storing additional information about the object in a structured format, and
2786 querying for objects via API or the dashboard.
2787
2788 Keys are strings with a maximum length of 64 characters. Values are strings with
2789 a maximum length of 512 characters.
2790
2791 extra_headers: Send extra headers
2792
2793 extra_query: Add additional query parameters to the request
2794
2795 extra_body: Add additional JSON properties to the request
2796
2797 timeout: Override the client-level default timeout for this request, in seconds
2798 """
2799 if not completion_id:
2800 raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
2801 return await self._post(
2802 f"/chat/completions/{completion_id}",
2803 body=await async_maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
2804 options=make_request_options(
2805 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
2806 ),
2807 cast_to=ChatCompletion,
2808 )
2809
2810 def list(
2811 self,
2812 *,
2813 after: str | Omit = omit,
2814 limit: int | Omit = omit,
2815 metadata: Optional[Metadata] | Omit = omit,
2816 model: str | Omit = omit,
2817 order: Literal["asc", "desc"] | Omit = omit,
2818 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2819 # The extra values given here take precedence over values defined on the client or passed to this method.
2820 extra_headers: Headers | None = None,
2821 extra_query: Query | None = None,
2822 extra_body: Body | None = None,
2823 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2824 ) -> AsyncPaginator[ChatCompletion, AsyncCursorPage[ChatCompletion]]:
2825 """List stored Chat Completions.
2826
2827 Only Chat Completions that have been stored with
2828 the `store` parameter set to `true` will be returned.
2829
2830 Args:
2831 after: Identifier for the last chat completion from the previous pagination request.
2832
2833 limit: Number of Chat Completions to retrieve.
2834
2835 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2836 for storing additional information about the object in a structured format, and
2837 querying for objects via API or the dashboard.
2838
2839 Keys are strings with a maximum length of 64 characters. Values are strings with
2840 a maximum length of 512 characters.
2841
2842 model: The model used to generate the Chat Completions.
2843
2844 order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
2845 `desc` for descending order. Defaults to `asc`.
2846
2847 extra_headers: Send extra headers
2848
2849 extra_query: Add additional query parameters to the request
2850
2851 extra_body: Add additional JSON properties to the request
2852
2853 timeout: Override the client-level default timeout for this request, in seconds
2854 """
2855 return self._get_api_list(
2856 "/chat/completions",
2857 page=AsyncCursorPage[ChatCompletion],
2858 options=make_request_options(
2859 extra_headers=extra_headers,
2860 extra_query=extra_query,
2861 extra_body=extra_body,
2862 timeout=timeout,
2863 query=maybe_transform(
2864 {
2865 "after": after,
2866 "limit": limit,
2867 "metadata": metadata,
2868 "model": model,
2869 "order": order,
2870 },
2871 completion_list_params.CompletionListParams,
2872 ),
2873 ),
2874 model=ChatCompletion,
2875 )
2876
2877 async def delete(
2878 self,
2879 completion_id: str,
2880 *,
2881 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2882 # The extra values given here take precedence over values defined on the client or passed to this method.
2883 extra_headers: Headers | None = None,
2884 extra_query: Query | None = None,
2885 extra_body: Body | None = None,
2886 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2887 ) -> ChatCompletionDeleted:
2888 """Delete a stored chat completion.
2889
2890 Only Chat Completions that have been created
2891 with the `store` parameter set to `true` can be deleted.
2892
2893 Args:
2894 extra_headers: Send extra headers
2895
2896 extra_query: Add additional query parameters to the request
2897
2898 extra_body: Add additional JSON properties to the request
2899
2900 timeout: Override the client-level default timeout for this request, in seconds
2901 """
2902 if not completion_id:
2903 raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
2904 return await self._delete(
2905 f"/chat/completions/{completion_id}",
2906 options=make_request_options(
2907 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
2908 ),
2909 cast_to=ChatCompletionDeleted,
2910 )
2911
2912 def stream(
2913 self,
2914 *,
2915 messages: Iterable[ChatCompletionMessageParam],
2916 model: Union[str, ChatModel],
2917 audio: Optional[ChatCompletionAudioParam] | Omit = omit,
2918 response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | Omit = omit,
2919 frequency_penalty: Optional[float] | Omit = omit,
2920 function_call: completion_create_params.FunctionCall | Omit = omit,
2921 functions: Iterable[completion_create_params.Function] | Omit = omit,
2922 logit_bias: Optional[Dict[str, int]] | Omit = omit,
2923 logprobs: Optional[bool] | Omit = omit,
2924 max_completion_tokens: Optional[int] | Omit = omit,
2925 max_tokens: Optional[int] | Omit = omit,
2926 metadata: Optional[Metadata] | Omit = omit,
2927 modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
2928 n: Optional[int] | Omit = omit,
2929 parallel_tool_calls: bool | Omit = omit,
2930 prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
2931 presence_penalty: Optional[float] | Omit = omit,
2932 prompt_cache_key: str | Omit = omit,
2933 prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2934 reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
2935 safety_identifier: str | Omit = omit,
2936 seed: Optional[int] | Omit = omit,
2937 service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2938 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
2939 store: Optional[bool] | Omit = omit,
2940 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
2941 temperature: Optional[float] | Omit = omit,
2942 tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
2943 tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
2944 top_logprobs: Optional[int] | Omit = omit,
2945 top_p: Optional[float] | Omit = omit,
2946 user: str | Omit = omit,
2947 verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
2948 web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
2949 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2950 # The extra values given here take precedence over values defined on the client or passed to this method.
2951 extra_headers: Headers | None = None,
2952 extra_query: Query | None = None,
2953 extra_body: Body | None = None,
2954 timeout: float | httpx.Timeout | None | NotGiven = not_given,
2955 ) -> AsyncChatCompletionStreamManager[ResponseFormatT]:
2956 """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
2957 and automatic accumulation of each delta.
2958
2959 This also supports all of the parsing utilities that `.parse()` does.
2960
2961 Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
2962
2963 ```py
2964 async with client.chat.completions.stream(
2965 model="gpt-4o-2024-08-06",
2966 messages=[...],
2967 ) as stream:
2968 async for event in stream:
2969 if event.type == "content.delta":
2970 print(event.delta, flush=True, end="")
2971 ```
2972
2973 When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
2974
2975 When the context manager exits, the response will be closed, however the `stream` instance is still available outside
2976 the context manager.
2977 """
2978 _validate_input_tools(tools)
2979
2980 extra_headers = {
2981 "X-Stainless-Helper-Method": "chat.completions.stream",
2982 **(extra_headers or {}),
2983 }
2984
2985 api_request = self.create(
2986 messages=messages,
2987 model=model,
2988 audio=audio,
2989 stream=True,
2990 response_format=_type_to_response_format(response_format),
2991 frequency_penalty=frequency_penalty,
2992 function_call=function_call,
2993 functions=functions,
2994 logit_bias=logit_bias,
2995 logprobs=logprobs,
2996 max_completion_tokens=max_completion_tokens,
2997 max_tokens=max_tokens,
2998 metadata=metadata,
2999 modalities=modalities,
3000 n=n,
3001 parallel_tool_calls=parallel_tool_calls,
3002 prediction=prediction,
3003 presence_penalty=presence_penalty,
3004 prompt_cache_key=prompt_cache_key,
3005 prompt_cache_retention=prompt_cache_retention,
3006 reasoning_effort=reasoning_effort,
3007 safety_identifier=safety_identifier,
3008 seed=seed,
3009 service_tier=service_tier,
3010 stop=stop,
3011 store=store,
3012 stream_options=stream_options,
3013 temperature=temperature,
3014 tool_choice=tool_choice,
3015 tools=tools,
3016 top_logprobs=top_logprobs,
3017 top_p=top_p,
3018 user=user,
3019 verbosity=verbosity,
3020 web_search_options=web_search_options,
3021 extra_headers=extra_headers,
3022 extra_query=extra_query,
3023 extra_body=extra_body,
3024 timeout=timeout,
3025 )
3026 return AsyncChatCompletionStreamManager(
3027 api_request,
3028 response_format=response_format,
3029 input_tools=tools,
3030 )
3031
3032
3033class CompletionsWithRawResponse:
3034 def __init__(self, completions: Completions) -> None:
3035 self._completions = completions
3036
3037 self.parse = _legacy_response.to_raw_response_wrapper(
3038 completions.parse,
3039 )
3040 self.create = _legacy_response.to_raw_response_wrapper(
3041 completions.create,
3042 )
3043 self.retrieve = _legacy_response.to_raw_response_wrapper(
3044 completions.retrieve,
3045 )
3046 self.update = _legacy_response.to_raw_response_wrapper(
3047 completions.update,
3048 )
3049 self.list = _legacy_response.to_raw_response_wrapper(
3050 completions.list,
3051 )
3052 self.delete = _legacy_response.to_raw_response_wrapper(
3053 completions.delete,
3054 )
3055
3056 @cached_property
3057 def messages(self) -> MessagesWithRawResponse:
3058 return MessagesWithRawResponse(self._completions.messages)
3059
3060
3061class AsyncCompletionsWithRawResponse:
3062 def __init__(self, completions: AsyncCompletions) -> None:
3063 self._completions = completions
3064
3065 self.parse = _legacy_response.async_to_raw_response_wrapper(
3066 completions.parse,
3067 )
3068 self.create = _legacy_response.async_to_raw_response_wrapper(
3069 completions.create,
3070 )
3071 self.retrieve = _legacy_response.async_to_raw_response_wrapper(
3072 completions.retrieve,
3073 )
3074 self.update = _legacy_response.async_to_raw_response_wrapper(
3075 completions.update,
3076 )
3077 self.list = _legacy_response.async_to_raw_response_wrapper(
3078 completions.list,
3079 )
3080 self.delete = _legacy_response.async_to_raw_response_wrapper(
3081 completions.delete,
3082 )
3083
3084 @cached_property
3085 def messages(self) -> AsyncMessagesWithRawResponse:
3086 return AsyncMessagesWithRawResponse(self._completions.messages)
3087
3088
3089class CompletionsWithStreamingResponse:
3090 def __init__(self, completions: Completions) -> None:
3091 self._completions = completions
3092
3093 self.parse = to_streamed_response_wrapper(
3094 completions.parse,
3095 )
3096 self.create = to_streamed_response_wrapper(
3097 completions.create,
3098 )
3099 self.retrieve = to_streamed_response_wrapper(
3100 completions.retrieve,
3101 )
3102 self.update = to_streamed_response_wrapper(
3103 completions.update,
3104 )
3105 self.list = to_streamed_response_wrapper(
3106 completions.list,
3107 )
3108 self.delete = to_streamed_response_wrapper(
3109 completions.delete,
3110 )
3111
3112 @cached_property
3113 def messages(self) -> MessagesWithStreamingResponse:
3114 return MessagesWithStreamingResponse(self._completions.messages)
3115
3116
3117class AsyncCompletionsWithStreamingResponse:
3118 def __init__(self, completions: AsyncCompletions) -> None:
3119 self._completions = completions
3120
3121 self.parse = async_to_streamed_response_wrapper(
3122 completions.parse,
3123 )
3124 self.create = async_to_streamed_response_wrapper(
3125 completions.create,
3126 )
3127 self.retrieve = async_to_streamed_response_wrapper(
3128 completions.retrieve,
3129 )
3130 self.update = async_to_streamed_response_wrapper(
3131 completions.update,
3132 )
3133 self.list = async_to_streamed_response_wrapper(
3134 completions.list,
3135 )
3136 self.delete = async_to_streamed_response_wrapper(
3137 completions.delete,
3138 )
3139
3140 @cached_property
3141 def messages(self) -> AsyncMessagesWithStreamingResponse:
3142 return AsyncMessagesWithStreamingResponse(self._completions.messages)
3143
3144
3145def validate_response_format(response_format: object) -> None:
3146 if inspect.isclass(response_format) and issubclass(response_format, pydantic.BaseModel):
3147 raise TypeError(
3148 "You tried to pass a `BaseModel` class to `chat.completions.create()`; You must use `chat.completions.parse()` instead"
3149 )