openai-python/src/openai/resources/chat/completions/completions.py at main

   1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
   2
   3from __future__ import annotations
   4
   5import inspect
   6from typing import Dict, List, Type, Union, Iterable, Optional, cast
   7from functools import partial
   8from typing_extensions import Literal, overload
   9
  10import httpx
  11import pydantic
  12
  13from .... import _legacy_response
  14from .messages import (
  15    Messages,
  16    AsyncMessages,
  17    MessagesWithRawResponse,
  18    AsyncMessagesWithRawResponse,
  19    MessagesWithStreamingResponse,
  20    AsyncMessagesWithStreamingResponse,
  21)
  22from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
  23from ...._utils import required_args, maybe_transform, async_maybe_transform
  24from ...._compat import cached_property
  25from ...._resource import SyncAPIResource, AsyncAPIResource
  26from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
  27from ...._streaming import Stream, AsyncStream
  28from ....pagination import SyncCursorPage, AsyncCursorPage
  29from ....types.chat import (
  30    ChatCompletionAudioParam,
  31    completion_list_params,
  32    completion_create_params,
  33    completion_update_params,
  34)
  35from ...._base_client import AsyncPaginator, make_request_options
  36from ....lib._parsing import (
  37    ResponseFormatT,
  38    validate_input_tools as _validate_input_tools,
  39    parse_chat_completion as _parse_chat_completion,
  40    type_to_response_format_param as _type_to_response_format,
  41)
  42from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager
  43from ....types.shared.chat_model import ChatModel
  44from ....types.chat.chat_completion import ChatCompletion
  45from ....types.shared_params.metadata import Metadata
  46from ....types.shared.reasoning_effort import ReasoningEffort
  47from ....types.chat.chat_completion_chunk import ChatCompletionChunk
  48from ....types.chat.parsed_chat_completion import ParsedChatCompletion
  49from ....types.chat.chat_completion_deleted import ChatCompletionDeleted
  50from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
  51from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
  52from ....types.chat.chat_completion_tool_union_param import ChatCompletionToolUnionParam
  53from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
  54from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
  55from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
  56
  57__all__ = ["Completions", "AsyncCompletions"]
  58
  59
  60class Completions(SyncAPIResource):
  61    @cached_property
  62    def messages(self) -> Messages:
  63        return Messages(self._client)
  64
  65    @cached_property
  66    def with_raw_response(self) -> CompletionsWithRawResponse:
  67        """
  68        This property can be used as a prefix for any HTTP method call to return
  69        the raw response object instead of the parsed content.
  70
  71        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  72        """
  73        return CompletionsWithRawResponse(self)
  74
  75    @cached_property
  76    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
  77        """
  78        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  79
  80        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  81        """
  82        return CompletionsWithStreamingResponse(self)
  83
  84    def parse(
  85        self,
  86        *,
  87        messages: Iterable[ChatCompletionMessageParam],
  88        model: Union[str, ChatModel],
  89        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
  90        response_format: type[ResponseFormatT] | Omit = omit,
  91        frequency_penalty: Optional[float] | Omit = omit,
  92        function_call: completion_create_params.FunctionCall | Omit = omit,
  93        functions: Iterable[completion_create_params.Function] | Omit = omit,
  94        logit_bias: Optional[Dict[str, int]] | Omit = omit,
  95        logprobs: Optional[bool] | Omit = omit,
  96        max_completion_tokens: Optional[int] | Omit = omit,
  97        max_tokens: Optional[int] | Omit = omit,
  98        metadata: Optional[Metadata] | Omit = omit,
  99        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
 100        n: Optional[int] | Omit = omit,
 101        parallel_tool_calls: bool | Omit = omit,
 102        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
 103        presence_penalty: Optional[float] | Omit = omit,
 104        prompt_cache_key: str | Omit = omit,
 105        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
 106        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
 107        safety_identifier: str | Omit = omit,
 108        seed: Optional[int] | Omit = omit,
 109        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
 110        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
 111        store: Optional[bool] | Omit = omit,
 112        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
 113        temperature: Optional[float] | Omit = omit,
 114        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
 115        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
 116        top_logprobs: Optional[int] | Omit = omit,
 117        top_p: Optional[float] | Omit = omit,
 118        user: str | Omit = omit,
 119        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
 120        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
 121        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 122        # The extra values given here take precedence over values defined on the client or passed to this method.
 123        extra_headers: Headers | None = None,
 124        extra_query: Query | None = None,
 125        extra_body: Body | None = None,
 126        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 127    ) -> ParsedChatCompletion[ResponseFormatT]:
 128        """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
 129        & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
 130
 131        You can pass a pydantic model to this method and it will automatically convert the model
 132        into a JSON schema, send it to the API and parse the response content back into the given model.
 133
 134        This method will also automatically parse `function` tool calls if:
 135        - You use the `openai.pydantic_function_tool()` helper method
 136        - You mark your tool schema with `"strict": True`
 137
 138        Example usage:
 139        ```py
 140        from pydantic import BaseModel
 141        from openai import OpenAI
 142
 143
 144        class Step(BaseModel):
 145            explanation: str
 146            output: str
 147
 148
 149        class MathResponse(BaseModel):
 150            steps: List[Step]
 151            final_answer: str
 152
 153
 154        client = OpenAI()
 155        completion = client.chat.completions.parse(
 156            model="gpt-4o-2024-08-06",
 157            messages=[
 158                {"role": "system", "content": "You are a helpful math tutor."},
 159                {"role": "user", "content": "solve 8x + 31 = 2"},
 160            ],
 161            response_format=MathResponse,
 162        )
 163
 164        message = completion.choices[0].message
 165        if message.parsed:
 166            print(message.parsed.steps)
 167            print("answer: ", message.parsed.final_answer)
 168        ```
 169        """
 170        chat_completion_tools = _validate_input_tools(tools)
 171
 172        extra_headers = {
 173            "X-Stainless-Helper-Method": "chat.completions.parse",
 174            **(extra_headers or {}),
 175        }
 176
 177        def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
 178            return _parse_chat_completion(
 179                response_format=response_format,
 180                chat_completion=raw_completion,
 181                input_tools=chat_completion_tools,
 182            )
 183
 184        return self._post(
 185            "/chat/completions",
 186            body=maybe_transform(
 187                {
 188                    "messages": messages,
 189                    "model": model,
 190                    "audio": audio,
 191                    "frequency_penalty": frequency_penalty,
 192                    "function_call": function_call,
 193                    "functions": functions,
 194                    "logit_bias": logit_bias,
 195                    "logprobs": logprobs,
 196                    "max_completion_tokens": max_completion_tokens,
 197                    "max_tokens": max_tokens,
 198                    "metadata": metadata,
 199                    "modalities": modalities,
 200                    "n": n,
 201                    "parallel_tool_calls": parallel_tool_calls,
 202                    "prediction": prediction,
 203                    "presence_penalty": presence_penalty,
 204                    "prompt_cache_key": prompt_cache_key,
 205                    "prompt_cache_retention": prompt_cache_retention,
 206                    "reasoning_effort": reasoning_effort,
 207                    "response_format": _type_to_response_format(response_format),
 208                    "safety_identifier": safety_identifier,
 209                    "seed": seed,
 210                    "service_tier": service_tier,
 211                    "stop": stop,
 212                    "store": store,
 213                    "stream": False,
 214                    "stream_options": stream_options,
 215                    "temperature": temperature,
 216                    "tool_choice": tool_choice,
 217                    "tools": tools,
 218                    "top_logprobs": top_logprobs,
 219                    "top_p": top_p,
 220                    "user": user,
 221                    "verbosity": verbosity,
 222                    "web_search_options": web_search_options,
 223                },
 224                completion_create_params.CompletionCreateParams,
 225            ),
 226            options=make_request_options(
 227                extra_headers=extra_headers,
 228                extra_query=extra_query,
 229                extra_body=extra_body,
 230                timeout=timeout,
 231                post_parser=parser,
 232            ),
 233            # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
 234            # in the `parser` function above
 235            cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
 236            stream=False,
 237        )
 238
 239    @overload
 240    def create(
 241        self,
 242        *,
 243        messages: Iterable[ChatCompletionMessageParam],
 244        model: Union[str, ChatModel],
 245        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
 246        frequency_penalty: Optional[float] | Omit = omit,
 247        function_call: completion_create_params.FunctionCall | Omit = omit,
 248        functions: Iterable[completion_create_params.Function] | Omit = omit,
 249        logit_bias: Optional[Dict[str, int]] | Omit = omit,
 250        logprobs: Optional[bool] | Omit = omit,
 251        max_completion_tokens: Optional[int] | Omit = omit,
 252        max_tokens: Optional[int] | Omit = omit,
 253        metadata: Optional[Metadata] | Omit = omit,
 254        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
 255        n: Optional[int] | Omit = omit,
 256        parallel_tool_calls: bool | Omit = omit,
 257        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
 258        presence_penalty: Optional[float] | Omit = omit,
 259        prompt_cache_key: str | Omit = omit,
 260        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
 261        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
 262        response_format: completion_create_params.ResponseFormat | Omit = omit,
 263        safety_identifier: str | Omit = omit,
 264        seed: Optional[int] | Omit = omit,
 265        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
 266        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
 267        store: Optional[bool] | Omit = omit,
 268        stream: Optional[Literal[False]] | Omit = omit,
 269        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
 270        temperature: Optional[float] | Omit = omit,
 271        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
 272        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
 273        top_logprobs: Optional[int] | Omit = omit,
 274        top_p: Optional[float] | Omit = omit,
 275        user: str | Omit = omit,
 276        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
 277        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
 278        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 279        # The extra values given here take precedence over values defined on the client or passed to this method.
 280        extra_headers: Headers | None = None,
 281        extra_query: Query | None = None,
 282        extra_body: Body | None = None,
 283        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 284    ) -> ChatCompletion:
 285        """
 286        **Starting a new project?** We recommend trying
 287        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
 288        advantage of the latest OpenAI platform features. Compare
 289        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
 290
 291        ---
 292
 293        Creates a model response for the given chat conversation. Learn more in the
 294        [text generation](https://platform.openai.com/docs/guides/text-generation),
 295        [vision](https://platform.openai.com/docs/guides/vision), and
 296        [audio](https://platform.openai.com/docs/guides/audio) guides.
 297
 298        Parameter support can differ depending on the model used to generate the
 299        response, particularly for newer reasoning models. Parameters that are only
 300        supported for reasoning models are noted below. For the current state of
 301        unsupported parameters in reasoning models,
 302        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
 303
 304        Args:
 305          messages: A list of messages comprising the conversation so far. Depending on the
 306              [model](https://platform.openai.com/docs/models) you use, different message
 307              types (modalities) are supported, like
 308              [text](https://platform.openai.com/docs/guides/text-generation),
 309              [images](https://platform.openai.com/docs/guides/vision), and
 310              [audio](https://platform.openai.com/docs/guides/audio).
 311
 312          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
 313              wide range of models with different capabilities, performance characteristics,
 314              and price points. Refer to the
 315              [model guide](https://platform.openai.com/docs/models) to browse and compare
 316              available models.
 317
 318          audio: Parameters for audio output. Required when audio output is requested with
 319              `modalities: ["audio"]`.
 320              [Learn more](https://platform.openai.com/docs/guides/audio).
 321
 322          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
 323              existing frequency in the text so far, decreasing the model's likelihood to
 324              repeat the same line verbatim.
 325
 326          function_call: Deprecated in favor of `tool_choice`.
 327
 328              Controls which (if any) function is called by the model.
 329
 330              `none` means the model will not call a function and instead generates a message.
 331
 332              `auto` means the model can pick between generating a message or calling a
 333              function.
 334
 335              Specifying a particular function via `{"name": "my_function"}` forces the model
 336              to call that function.
 337
 338              `none` is the default when no functions are present. `auto` is the default if
 339              functions are present.
 340
 341          functions: Deprecated in favor of `tools`.
 342
 343              A list of functions the model may generate JSON inputs for.
 344
 345          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 346
 347              Accepts a JSON object that maps tokens (specified by their token ID in the
 348              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
 349              bias is added to the logits generated by the model prior to sampling. The exact
 350              effect will vary per model, but values between -1 and 1 should decrease or
 351              increase likelihood of selection; values like -100 or 100 should result in a ban
 352              or exclusive selection of the relevant token.
 353
 354          logprobs: Whether to return log probabilities of the output tokens or not. If true,
 355              returns the log probabilities of each output token returned in the `content` of
 356              `message`.
 357
 358          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
 359              including visible output tokens and
 360              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 361
 362          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
 363              completion. This value can be used to control
 364              [costs](https://openai.com/api/pricing/) for text generated via API.
 365
 366              This value is now deprecated in favor of `max_completion_tokens`, and is not
 367              compatible with
 368              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 369
 370          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 371              for storing additional information about the object in a structured format, and
 372              querying for objects via API or the dashboard.
 373
 374              Keys are strings with a maximum length of 64 characters. Values are strings with
 375              a maximum length of 512 characters.
 376
 377          modalities: Output types that you would like the model to generate. Most models are capable
 378              of generating text, which is the default:
 379
 380              `["text"]`
 381
 382              The `gpt-4o-audio-preview` model can also be used to
 383              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
 384              this model generate both text and audio responses, you can use:
 385
 386              `["text", "audio"]`
 387
 388          n: How many chat completion choices to generate for each input message. Note that
 389              you will be charged based on the number of generated tokens across all of the
 390              choices. Keep `n` as `1` to minimize costs.
 391
 392          parallel_tool_calls: Whether to enable
 393              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
 394              during tool use.
 395
 396          prediction: Static predicted output content, such as the content of a text file that is
 397              being regenerated.
 398
 399          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
 400              whether they appear in the text so far, increasing the model's likelihood to
 401              talk about new topics.
 402
 403          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
 404              hit rates. Replaces the `user` field.
 405              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
 406
 407          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
 408              prompt caching, which keeps cached prefixes active for longer, up to a maximum
 409              of 24 hours.
 410              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
 411
 412          reasoning_effort: Constrains effort on reasoning for
 413              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
 414              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
 415              Reducing reasoning effort can result in faster responses and fewer tokens used
 416              on reasoning in a response.
 417
 418              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
 419                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
 420                calls are supported for all reasoning values in gpt-5.1.
 421              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
 422                support `none`.
 423              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
 424              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 425
 426          response_format: An object specifying the format that the model must output.
 427
 428              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
 429              Outputs which ensures the model will match your supplied JSON schema. Learn more
 430              in the
 431              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 432
 433              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
 434              ensures the message the model generates is valid JSON. Using `json_schema` is
 435              preferred for models that support it.
 436
 437          safety_identifier: A stable identifier used to help detect users of your application that may be
 438              violating OpenAI's usage policies. The IDs should be a string that uniquely
 439              identifies each user. We recommend hashing their username or email address, in
 440              order to avoid sending us any identifying information.
 441              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
 442
 443          seed: This feature is in Beta. If specified, our system will make a best effort to
 444              sample deterministically, such that repeated requests with the same `seed` and
 445              parameters should return the same result. Determinism is not guaranteed, and you
 446              should refer to the `system_fingerprint` response parameter to monitor changes
 447              in the backend.
 448
 449          service_tier: Specifies the processing type used for serving the request.
 450
 451              - If set to 'auto', then the request will be processed with the service tier
 452                configured in the Project settings. Unless otherwise configured, the Project
 453                will use 'default'.
 454              - If set to 'default', then the request will be processed with the standard
 455                pricing and performance for the selected model.
 456              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
 457                '[priority](https://openai.com/api-priority-processing/)', then the request
 458                will be processed with the corresponding service tier.
 459              - When not set, the default behavior is 'auto'.
 460
 461              When the `service_tier` parameter is set, the response body will include the
 462              `service_tier` value based on the processing mode actually used to serve the
 463              request. This response value may be different from the value set in the
 464              parameter.
 465
 466          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 467
 468              Up to 4 sequences where the API will stop generating further tokens. The
 469              returned text will not contain the stop sequence.
 470
 471          store: Whether or not to store the output of this chat completion request for use in
 472              our [model distillation](https://platform.openai.com/docs/guides/distillation)
 473              or [evals](https://platform.openai.com/docs/guides/evals) products.
 474
 475              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
 476
 477          stream: If set to true, the model response data will be streamed to the client as it is
 478              generated using
 479              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
 480              See the
 481              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
 482              for more information, along with the
 483              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
 484              guide for more information on how to handle the streaming events.
 485
 486          stream_options: Options for streaming response. Only set this when you set `stream: true`.
 487
 488          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 489              make the output more random, while lower values like 0.2 will make it more
 490              focused and deterministic. We generally recommend altering this or `top_p` but
 491              not both.
 492
 493          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
 494              not call any tool and instead generates a message. `auto` means the model can
 495              pick between generating a message or calling one or more tools. `required` means
 496              the model must call one or more tools. Specifying a particular tool via
 497              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
 498              call that tool.
 499
 500              `none` is the default when no tools are present. `auto` is the default if tools
 501              are present.
 502
 503          tools: A list of tools the model may call. You can provide either
 504              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
 505              or [function tools](https://platform.openai.com/docs/guides/function-calling).
 506
 507          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
 508              return at each token position, each with an associated log probability.
 509              `logprobs` must be set to `true` if this parameter is used.
 510
 511          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 512              model considers the results of the tokens with top_p probability mass. So 0.1
 513              means only the tokens comprising the top 10% probability mass are considered.
 514
 515              We generally recommend altering this or `temperature` but not both.
 516
 517          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
 518              `prompt_cache_key` instead to maintain caching optimizations. A stable
 519              identifier for your end-users. Used to boost cache hit rates by better bucketing
 520              similar requests and to help OpenAI detect and prevent abuse.
 521              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
 522
 523          verbosity: Constrains the verbosity of the model's response. Lower values will result in
 524              more concise responses, while higher values will result in more verbose
 525              responses. Currently supported values are `low`, `medium`, and `high`.
 526
 527          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
 528              about the
 529              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
 530
 531          extra_headers: Send extra headers
 532
 533          extra_query: Add additional query parameters to the request
 534
 535          extra_body: Add additional JSON properties to the request
 536
 537          timeout: Override the client-level default timeout for this request, in seconds
 538        """
 539        ...
 540
 541    @overload
 542    def create(
 543        self,
 544        *,
 545        messages: Iterable[ChatCompletionMessageParam],
 546        model: Union[str, ChatModel],
 547        stream: Literal[True],
 548        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
 549        frequency_penalty: Optional[float] | Omit = omit,
 550        function_call: completion_create_params.FunctionCall | Omit = omit,
 551        functions: Iterable[completion_create_params.Function] | Omit = omit,
 552        logit_bias: Optional[Dict[str, int]] | Omit = omit,
 553        logprobs: Optional[bool] | Omit = omit,
 554        max_completion_tokens: Optional[int] | Omit = omit,
 555        max_tokens: Optional[int] | Omit = omit,
 556        metadata: Optional[Metadata] | Omit = omit,
 557        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
 558        n: Optional[int] | Omit = omit,
 559        parallel_tool_calls: bool | Omit = omit,
 560        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
 561        presence_penalty: Optional[float] | Omit = omit,
 562        prompt_cache_key: str | Omit = omit,
 563        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
 564        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
 565        response_format: completion_create_params.ResponseFormat | Omit = omit,
 566        safety_identifier: str | Omit = omit,
 567        seed: Optional[int] | Omit = omit,
 568        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
 569        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
 570        store: Optional[bool] | Omit = omit,
 571        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
 572        temperature: Optional[float] | Omit = omit,
 573        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
 574        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
 575        top_logprobs: Optional[int] | Omit = omit,
 576        top_p: Optional[float] | Omit = omit,
 577        user: str | Omit = omit,
 578        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
 579        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
 580        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 581        # The extra values given here take precedence over values defined on the client or passed to this method.
 582        extra_headers: Headers | None = None,
 583        extra_query: Query | None = None,
 584        extra_body: Body | None = None,
 585        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 586    ) -> Stream[ChatCompletionChunk]:
 587        """
 588        **Starting a new project?** We recommend trying
 589        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
 590        advantage of the latest OpenAI platform features. Compare
 591        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
 592
 593        ---
 594
 595        Creates a model response for the given chat conversation. Learn more in the
 596        [text generation](https://platform.openai.com/docs/guides/text-generation),
 597        [vision](https://platform.openai.com/docs/guides/vision), and
 598        [audio](https://platform.openai.com/docs/guides/audio) guides.
 599
 600        Parameter support can differ depending on the model used to generate the
 601        response, particularly for newer reasoning models. Parameters that are only
 602        supported for reasoning models are noted below. For the current state of
 603        unsupported parameters in reasoning models,
 604        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
 605
 606        Args:
 607          messages: A list of messages comprising the conversation so far. Depending on the
 608              [model](https://platform.openai.com/docs/models) you use, different message
 609              types (modalities) are supported, like
 610              [text](https://platform.openai.com/docs/guides/text-generation),
 611              [images](https://platform.openai.com/docs/guides/vision), and
 612              [audio](https://platform.openai.com/docs/guides/audio).
 613
 614          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
 615              wide range of models with different capabilities, performance characteristics,
 616              and price points. Refer to the
 617              [model guide](https://platform.openai.com/docs/models) to browse and compare
 618              available models.
 619
 620          stream: If set to true, the model response data will be streamed to the client as it is
 621              generated using
 622              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
 623              See the
 624              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
 625              for more information, along with the
 626              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
 627              guide for more information on how to handle the streaming events.
 628
 629          audio: Parameters for audio output. Required when audio output is requested with
 630              `modalities: ["audio"]`.
 631              [Learn more](https://platform.openai.com/docs/guides/audio).
 632
 633          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
 634              existing frequency in the text so far, decreasing the model's likelihood to
 635              repeat the same line verbatim.
 636
 637          function_call: Deprecated in favor of `tool_choice`.
 638
 639              Controls which (if any) function is called by the model.
 640
 641              `none` means the model will not call a function and instead generates a message.
 642
 643              `auto` means the model can pick between generating a message or calling a
 644              function.
 645
 646              Specifying a particular function via `{"name": "my_function"}` forces the model
 647              to call that function.
 648
 649              `none` is the default when no functions are present. `auto` is the default if
 650              functions are present.
 651
 652          functions: Deprecated in favor of `tools`.
 653
 654              A list of functions the model may generate JSON inputs for.
 655
 656          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 657
 658              Accepts a JSON object that maps tokens (specified by their token ID in the
 659              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
 660              bias is added to the logits generated by the model prior to sampling. The exact
 661              effect will vary per model, but values between -1 and 1 should decrease or
 662              increase likelihood of selection; values like -100 or 100 should result in a ban
 663              or exclusive selection of the relevant token.
 664
 665          logprobs: Whether to return log probabilities of the output tokens or not. If true,
 666              returns the log probabilities of each output token returned in the `content` of
 667              `message`.
 668
 669          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
 670              including visible output tokens and
 671              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 672
 673          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
 674              completion. This value can be used to control
 675              [costs](https://openai.com/api/pricing/) for text generated via API.
 676
 677              This value is now deprecated in favor of `max_completion_tokens`, and is not
 678              compatible with
 679              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 680
 681          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 682              for storing additional information about the object in a structured format, and
 683              querying for objects via API or the dashboard.
 684
 685              Keys are strings with a maximum length of 64 characters. Values are strings with
 686              a maximum length of 512 characters.
 687
 688          modalities: Output types that you would like the model to generate. Most models are capable
 689              of generating text, which is the default:
 690
 691              `["text"]`
 692
 693              The `gpt-4o-audio-preview` model can also be used to
 694              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
 695              this model generate both text and audio responses, you can use:
 696
 697              `["text", "audio"]`
 698
 699          n: How many chat completion choices to generate for each input message. Note that
 700              you will be charged based on the number of generated tokens across all of the
 701              choices. Keep `n` as `1` to minimize costs.
 702
 703          parallel_tool_calls: Whether to enable
 704              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
 705              during tool use.
 706
 707          prediction: Static predicted output content, such as the content of a text file that is
 708              being regenerated.
 709
 710          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
 711              whether they appear in the text so far, increasing the model's likelihood to
 712              talk about new topics.
 713
 714          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
 715              hit rates. Replaces the `user` field.
 716              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
 717
 718          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
 719              prompt caching, which keeps cached prefixes active for longer, up to a maximum
 720              of 24 hours.
 721              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
 722
 723          reasoning_effort: Constrains effort on reasoning for
 724              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
 725              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
 726              Reducing reasoning effort can result in faster responses and fewer tokens used
 727              on reasoning in a response.
 728
 729              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
 730                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
 731                calls are supported for all reasoning values in gpt-5.1.
 732              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
 733                support `none`.
 734              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
 735              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 736
 737          response_format: An object specifying the format that the model must output.
 738
 739              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
 740              Outputs which ensures the model will match your supplied JSON schema. Learn more
 741              in the
 742              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 743
 744              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
 745              ensures the message the model generates is valid JSON. Using `json_schema` is
 746              preferred for models that support it.
 747
 748          safety_identifier: A stable identifier used to help detect users of your application that may be
 749              violating OpenAI's usage policies. The IDs should be a string that uniquely
 750              identifies each user. We recommend hashing their username or email address, in
 751              order to avoid sending us any identifying information.
 752              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
 753
 754          seed: This feature is in Beta. If specified, our system will make a best effort to
 755              sample deterministically, such that repeated requests with the same `seed` and
 756              parameters should return the same result. Determinism is not guaranteed, and you
 757              should refer to the `system_fingerprint` response parameter to monitor changes
 758              in the backend.
 759
 760          service_tier: Specifies the processing type used for serving the request.
 761
 762              - If set to 'auto', then the request will be processed with the service tier
 763                configured in the Project settings. Unless otherwise configured, the Project
 764                will use 'default'.
 765              - If set to 'default', then the request will be processed with the standard
 766                pricing and performance for the selected model.
 767              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
 768                '[priority](https://openai.com/api-priority-processing/)', then the request
 769                will be processed with the corresponding service tier.
 770              - When not set, the default behavior is 'auto'.
 771
 772              When the `service_tier` parameter is set, the response body will include the
 773              `service_tier` value based on the processing mode actually used to serve the
 774              request. This response value may be different from the value set in the
 775              parameter.
 776
 777          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 778
 779              Up to 4 sequences where the API will stop generating further tokens. The
 780              returned text will not contain the stop sequence.
 781
 782          store: Whether or not to store the output of this chat completion request for use in
 783              our [model distillation](https://platform.openai.com/docs/guides/distillation)
 784              or [evals](https://platform.openai.com/docs/guides/evals) products.
 785
 786              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
 787
 788          stream_options: Options for streaming response. Only set this when you set `stream: true`.
 789
 790          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 791              make the output more random, while lower values like 0.2 will make it more
 792              focused and deterministic. We generally recommend altering this or `top_p` but
 793              not both.
 794
 795          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
 796              not call any tool and instead generates a message. `auto` means the model can
 797              pick between generating a message or calling one or more tools. `required` means
 798              the model must call one or more tools. Specifying a particular tool via
 799              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
 800              call that tool.
 801
 802              `none` is the default when no tools are present. `auto` is the default if tools
 803              are present.
 804
 805          tools: A list of tools the model may call. You can provide either
 806              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
 807              or [function tools](https://platform.openai.com/docs/guides/function-calling).
 808
 809          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
 810              return at each token position, each with an associated log probability.
 811              `logprobs` must be set to `true` if this parameter is used.
 812
 813          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 814              model considers the results of the tokens with top_p probability mass. So 0.1
 815              means only the tokens comprising the top 10% probability mass are considered.
 816
 817              We generally recommend altering this or `temperature` but not both.
 818
 819          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
 820              `prompt_cache_key` instead to maintain caching optimizations. A stable
 821              identifier for your end-users. Used to boost cache hit rates by better bucketing
 822              similar requests and to help OpenAI detect and prevent abuse.
 823              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
 824
 825          verbosity: Constrains the verbosity of the model's response. Lower values will result in
 826              more concise responses, while higher values will result in more verbose
 827              responses. Currently supported values are `low`, `medium`, and `high`.
 828
 829          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
 830              about the
 831              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
 832
 833          extra_headers: Send extra headers
 834
 835          extra_query: Add additional query parameters to the request
 836
 837          extra_body: Add additional JSON properties to the request
 838
 839          timeout: Override the client-level default timeout for this request, in seconds
 840        """
 841        ...
 842
 843    @overload
 844    def create(
 845        self,
 846        *,
 847        messages: Iterable[ChatCompletionMessageParam],
 848        model: Union[str, ChatModel],
 849        stream: bool,
 850        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
 851        frequency_penalty: Optional[float] | Omit = omit,
 852        function_call: completion_create_params.FunctionCall | Omit = omit,
 853        functions: Iterable[completion_create_params.Function] | Omit = omit,
 854        logit_bias: Optional[Dict[str, int]] | Omit = omit,
 855        logprobs: Optional[bool] | Omit = omit,
 856        max_completion_tokens: Optional[int] | Omit = omit,
 857        max_tokens: Optional[int] | Omit = omit,
 858        metadata: Optional[Metadata] | Omit = omit,
 859        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
 860        n: Optional[int] | Omit = omit,
 861        parallel_tool_calls: bool | Omit = omit,
 862        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
 863        presence_penalty: Optional[float] | Omit = omit,
 864        prompt_cache_key: str | Omit = omit,
 865        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
 866        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
 867        response_format: completion_create_params.ResponseFormat | Omit = omit,
 868        safety_identifier: str | Omit = omit,
 869        seed: Optional[int] | Omit = omit,
 870        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
 871        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
 872        store: Optional[bool] | Omit = omit,
 873        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
 874        temperature: Optional[float] | Omit = omit,
 875        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
 876        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
 877        top_logprobs: Optional[int] | Omit = omit,
 878        top_p: Optional[float] | Omit = omit,
 879        user: str | Omit = omit,
 880        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
 881        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
 882        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 883        # The extra values given here take precedence over values defined on the client or passed to this method.
 884        extra_headers: Headers | None = None,
 885        extra_query: Query | None = None,
 886        extra_body: Body | None = None,
 887        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 888    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
 889        """
 890        **Starting a new project?** We recommend trying
 891        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
 892        advantage of the latest OpenAI platform features. Compare
 893        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
 894
 895        ---
 896
 897        Creates a model response for the given chat conversation. Learn more in the
 898        [text generation](https://platform.openai.com/docs/guides/text-generation),
 899        [vision](https://platform.openai.com/docs/guides/vision), and
 900        [audio](https://platform.openai.com/docs/guides/audio) guides.
 901
 902        Parameter support can differ depending on the model used to generate the
 903        response, particularly for newer reasoning models. Parameters that are only
 904        supported for reasoning models are noted below. For the current state of
 905        unsupported parameters in reasoning models,
 906        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
 907
 908        Args:
 909          messages: A list of messages comprising the conversation so far. Depending on the
 910              [model](https://platform.openai.com/docs/models) you use, different message
 911              types (modalities) are supported, like
 912              [text](https://platform.openai.com/docs/guides/text-generation),
 913              [images](https://platform.openai.com/docs/guides/vision), and
 914              [audio](https://platform.openai.com/docs/guides/audio).
 915
 916          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
 917              wide range of models with different capabilities, performance characteristics,
 918              and price points. Refer to the
 919              [model guide](https://platform.openai.com/docs/models) to browse and compare
 920              available models.
 921
 922          stream: If set to true, the model response data will be streamed to the client as it is
 923              generated using
 924              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
 925              See the
 926              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
 927              for more information, along with the
 928              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
 929              guide for more information on how to handle the streaming events.
 930
 931          audio: Parameters for audio output. Required when audio output is requested with
 932              `modalities: ["audio"]`.
 933              [Learn more](https://platform.openai.com/docs/guides/audio).
 934
 935          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
 936              existing frequency in the text so far, decreasing the model's likelihood to
 937              repeat the same line verbatim.
 938
 939          function_call: Deprecated in favor of `tool_choice`.
 940
 941              Controls which (if any) function is called by the model.
 942
 943              `none` means the model will not call a function and instead generates a message.
 944
 945              `auto` means the model can pick between generating a message or calling a
 946              function.
 947
 948              Specifying a particular function via `{"name": "my_function"}` forces the model
 949              to call that function.
 950
 951              `none` is the default when no functions are present. `auto` is the default if
 952              functions are present.
 953
 954          functions: Deprecated in favor of `tools`.
 955
 956              A list of functions the model may generate JSON inputs for.
 957
 958          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 959
 960              Accepts a JSON object that maps tokens (specified by their token ID in the
 961              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
 962              bias is added to the logits generated by the model prior to sampling. The exact
 963              effect will vary per model, but values between -1 and 1 should decrease or
 964              increase likelihood of selection; values like -100 or 100 should result in a ban
 965              or exclusive selection of the relevant token.
 966
 967          logprobs: Whether to return log probabilities of the output tokens or not. If true,
 968              returns the log probabilities of each output token returned in the `content` of
 969              `message`.
 970
 971          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
 972              including visible output tokens and
 973              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
 974
 975          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
 976              completion. This value can be used to control
 977              [costs](https://openai.com/api/pricing/) for text generated via API.
 978
 979              This value is now deprecated in favor of `max_completion_tokens`, and is not
 980              compatible with
 981              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 982
 983          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 984              for storing additional information about the object in a structured format, and
 985              querying for objects via API or the dashboard.
 986
 987              Keys are strings with a maximum length of 64 characters. Values are strings with
 988              a maximum length of 512 characters.
 989
 990          modalities: Output types that you would like the model to generate. Most models are capable
 991              of generating text, which is the default:
 992
 993              `["text"]`
 994
 995              The `gpt-4o-audio-preview` model can also be used to
 996              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
 997              this model generate both text and audio responses, you can use:
 998
 999              `["text", "audio"]`
1000
1001          n: How many chat completion choices to generate for each input message. Note that
1002              you will be charged based on the number of generated tokens across all of the
1003              choices. Keep `n` as `1` to minimize costs.
1004
1005          parallel_tool_calls: Whether to enable
1006              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
1007              during tool use.
1008
1009          prediction: Static predicted output content, such as the content of a text file that is
1010              being regenerated.
1011
1012          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
1013              whether they appear in the text so far, increasing the model's likelihood to
1014              talk about new topics.
1015
1016          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
1017              hit rates. Replaces the `user` field.
1018              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
1019
1020          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
1021              prompt caching, which keeps cached prefixes active for longer, up to a maximum
1022              of 24 hours.
1023              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
1024
1025          reasoning_effort: Constrains effort on reasoning for
1026              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
1027              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
1028              Reducing reasoning effort can result in faster responses and fewer tokens used
1029              on reasoning in a response.
1030
1031              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
1032                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
1033                calls are supported for all reasoning values in gpt-5.1.
1034              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
1035                support `none`.
1036              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
1037              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
1038
1039          response_format: An object specifying the format that the model must output.
1040
1041              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
1042              Outputs which ensures the model will match your supplied JSON schema. Learn more
1043              in the
1044              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
1045
1046              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
1047              ensures the message the model generates is valid JSON. Using `json_schema` is
1048              preferred for models that support it.
1049
1050          safety_identifier: A stable identifier used to help detect users of your application that may be
1051              violating OpenAI's usage policies. The IDs should be a string that uniquely
1052              identifies each user. We recommend hashing their username or email address, in
1053              order to avoid sending us any identifying information.
1054              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
1055
1056          seed: This feature is in Beta. If specified, our system will make a best effort to
1057              sample deterministically, such that repeated requests with the same `seed` and
1058              parameters should return the same result. Determinism is not guaranteed, and you
1059              should refer to the `system_fingerprint` response parameter to monitor changes
1060              in the backend.
1061
1062          service_tier: Specifies the processing type used for serving the request.
1063
1064              - If set to 'auto', then the request will be processed with the service tier
1065                configured in the Project settings. Unless otherwise configured, the Project
1066                will use 'default'.
1067              - If set to 'default', then the request will be processed with the standard
1068                pricing and performance for the selected model.
1069              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
1070                '[priority](https://openai.com/api-priority-processing/)', then the request
1071                will be processed with the corresponding service tier.
1072              - When not set, the default behavior is 'auto'.
1073
1074              When the `service_tier` parameter is set, the response body will include the
1075              `service_tier` value based on the processing mode actually used to serve the
1076              request. This response value may be different from the value set in the
1077              parameter.
1078
1079          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
1080
1081              Up to 4 sequences where the API will stop generating further tokens. The
1082              returned text will not contain the stop sequence.
1083
1084          store: Whether or not to store the output of this chat completion request for use in
1085              our [model distillation](https://platform.openai.com/docs/guides/distillation)
1086              or [evals](https://platform.openai.com/docs/guides/evals) products.
1087
1088              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
1089
1090          stream_options: Options for streaming response. Only set this when you set `stream: true`.
1091
1092          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1093              make the output more random, while lower values like 0.2 will make it more
1094              focused and deterministic. We generally recommend altering this or `top_p` but
1095              not both.
1096
1097          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
1098              not call any tool and instead generates a message. `auto` means the model can
1099              pick between generating a message or calling one or more tools. `required` means
1100              the model must call one or more tools. Specifying a particular tool via
1101              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
1102              call that tool.
1103
1104              `none` is the default when no tools are present. `auto` is the default if tools
1105              are present.
1106
1107          tools: A list of tools the model may call. You can provide either
1108              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
1109              or [function tools](https://platform.openai.com/docs/guides/function-calling).
1110
1111          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
1112              return at each token position, each with an associated log probability.
1113              `logprobs` must be set to `true` if this parameter is used.
1114
1115          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1116              model considers the results of the tokens with top_p probability mass. So 0.1
1117              means only the tokens comprising the top 10% probability mass are considered.
1118
1119              We generally recommend altering this or `temperature` but not both.
1120
1121          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
1122              `prompt_cache_key` instead to maintain caching optimizations. A stable
1123              identifier for your end-users. Used to boost cache hit rates by better bucketing
1124              similar requests and to help OpenAI detect and prevent abuse.
1125              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
1126
1127          verbosity: Constrains the verbosity of the model's response. Lower values will result in
1128              more concise responses, while higher values will result in more verbose
1129              responses. Currently supported values are `low`, `medium`, and `high`.
1130
1131          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
1132              about the
1133              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
1134
1135          extra_headers: Send extra headers
1136
1137          extra_query: Add additional query parameters to the request
1138
1139          extra_body: Add additional JSON properties to the request
1140
1141          timeout: Override the client-level default timeout for this request, in seconds
1142        """
1143        ...
1144
1145    @required_args(["messages", "model"], ["messages", "model", "stream"])
1146    def create(
1147        self,
1148        *,
1149        messages: Iterable[ChatCompletionMessageParam],
1150        model: Union[str, ChatModel],
1151        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
1152        frequency_penalty: Optional[float] | Omit = omit,
1153        function_call: completion_create_params.FunctionCall | Omit = omit,
1154        functions: Iterable[completion_create_params.Function] | Omit = omit,
1155        logit_bias: Optional[Dict[str, int]] | Omit = omit,
1156        logprobs: Optional[bool] | Omit = omit,
1157        max_completion_tokens: Optional[int] | Omit = omit,
1158        max_tokens: Optional[int] | Omit = omit,
1159        metadata: Optional[Metadata] | Omit = omit,
1160        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
1161        n: Optional[int] | Omit = omit,
1162        parallel_tool_calls: bool | Omit = omit,
1163        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
1164        presence_penalty: Optional[float] | Omit = omit,
1165        prompt_cache_key: str | Omit = omit,
1166        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1167        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
1168        response_format: completion_create_params.ResponseFormat | Omit = omit,
1169        safety_identifier: str | Omit = omit,
1170        seed: Optional[int] | Omit = omit,
1171        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1172        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
1173        store: Optional[bool] | Omit = omit,
1174        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
1175        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
1176        temperature: Optional[float] | Omit = omit,
1177        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
1178        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
1179        top_logprobs: Optional[int] | Omit = omit,
1180        top_p: Optional[float] | Omit = omit,
1181        user: str | Omit = omit,
1182        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
1183        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
1184        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1185        # The extra values given here take precedence over values defined on the client or passed to this method.
1186        extra_headers: Headers | None = None,
1187        extra_query: Query | None = None,
1188        extra_body: Body | None = None,
1189        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1190    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
1191        validate_response_format(response_format)
1192        return self._post(
1193            "/chat/completions",
1194            body=maybe_transform(
1195                {
1196                    "messages": messages,
1197                    "model": model,
1198                    "audio": audio,
1199                    "frequency_penalty": frequency_penalty,
1200                    "function_call": function_call,
1201                    "functions": functions,
1202                    "logit_bias": logit_bias,
1203                    "logprobs": logprobs,
1204                    "max_completion_tokens": max_completion_tokens,
1205                    "max_tokens": max_tokens,
1206                    "metadata": metadata,
1207                    "modalities": modalities,
1208                    "n": n,
1209                    "parallel_tool_calls": parallel_tool_calls,
1210                    "prediction": prediction,
1211                    "presence_penalty": presence_penalty,
1212                    "prompt_cache_key": prompt_cache_key,
1213                    "prompt_cache_retention": prompt_cache_retention,
1214                    "reasoning_effort": reasoning_effort,
1215                    "response_format": response_format,
1216                    "safety_identifier": safety_identifier,
1217                    "seed": seed,
1218                    "service_tier": service_tier,
1219                    "stop": stop,
1220                    "store": store,
1221                    "stream": stream,
1222                    "stream_options": stream_options,
1223                    "temperature": temperature,
1224                    "tool_choice": tool_choice,
1225                    "tools": tools,
1226                    "top_logprobs": top_logprobs,
1227                    "top_p": top_p,
1228                    "user": user,
1229                    "verbosity": verbosity,
1230                    "web_search_options": web_search_options,
1231                },
1232                completion_create_params.CompletionCreateParamsStreaming
1233                if stream
1234                else completion_create_params.CompletionCreateParamsNonStreaming,
1235            ),
1236            options=make_request_options(
1237                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1238            ),
1239            cast_to=ChatCompletion,
1240            stream=stream or False,
1241            stream_cls=Stream[ChatCompletionChunk],
1242        )
1243
1244    def retrieve(
1245        self,
1246        completion_id: str,
1247        *,
1248        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1249        # The extra values given here take precedence over values defined on the client or passed to this method.
1250        extra_headers: Headers | None = None,
1251        extra_query: Query | None = None,
1252        extra_body: Body | None = None,
1253        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1254    ) -> ChatCompletion:
1255        """Get a stored chat completion.
1256
1257        Only Chat Completions that have been created with
1258        the `store` parameter set to `true` will be returned.
1259
1260        Args:
1261          extra_headers: Send extra headers
1262
1263          extra_query: Add additional query parameters to the request
1264
1265          extra_body: Add additional JSON properties to the request
1266
1267          timeout: Override the client-level default timeout for this request, in seconds
1268        """
1269        if not completion_id:
1270            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
1271        return self._get(
1272            f"/chat/completions/{completion_id}",
1273            options=make_request_options(
1274                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1275            ),
1276            cast_to=ChatCompletion,
1277        )
1278
1279    def update(
1280        self,
1281        completion_id: str,
1282        *,
1283        metadata: Optional[Metadata],
1284        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1285        # The extra values given here take precedence over values defined on the client or passed to this method.
1286        extra_headers: Headers | None = None,
1287        extra_query: Query | None = None,
1288        extra_body: Body | None = None,
1289        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1290    ) -> ChatCompletion:
1291        """Modify a stored chat completion.
1292
1293        Only Chat Completions that have been created
1294        with the `store` parameter set to `true` can be modified. Currently, the only
1295        supported modification is to update the `metadata` field.
1296
1297        Args:
1298          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1299              for storing additional information about the object in a structured format, and
1300              querying for objects via API or the dashboard.
1301
1302              Keys are strings with a maximum length of 64 characters. Values are strings with
1303              a maximum length of 512 characters.
1304
1305          extra_headers: Send extra headers
1306
1307          extra_query: Add additional query parameters to the request
1308
1309          extra_body: Add additional JSON properties to the request
1310
1311          timeout: Override the client-level default timeout for this request, in seconds
1312        """
1313        if not completion_id:
1314            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
1315        return self._post(
1316            f"/chat/completions/{completion_id}",
1317            body=maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
1318            options=make_request_options(
1319                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1320            ),
1321            cast_to=ChatCompletion,
1322        )
1323
1324    def list(
1325        self,
1326        *,
1327        after: str | Omit = omit,
1328        limit: int | Omit = omit,
1329        metadata: Optional[Metadata] | Omit = omit,
1330        model: str | Omit = omit,
1331        order: Literal["asc", "desc"] | Omit = omit,
1332        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1333        # The extra values given here take precedence over values defined on the client or passed to this method.
1334        extra_headers: Headers | None = None,
1335        extra_query: Query | None = None,
1336        extra_body: Body | None = None,
1337        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1338    ) -> SyncCursorPage[ChatCompletion]:
1339        """List stored Chat Completions.
1340
1341        Only Chat Completions that have been stored with
1342        the `store` parameter set to `true` will be returned.
1343
1344        Args:
1345          after: Identifier for the last chat completion from the previous pagination request.
1346
1347          limit: Number of Chat Completions to retrieve.
1348
1349          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1350              for storing additional information about the object in a structured format, and
1351              querying for objects via API or the dashboard.
1352
1353              Keys are strings with a maximum length of 64 characters. Values are strings with
1354              a maximum length of 512 characters.
1355
1356          model: The model used to generate the Chat Completions.
1357
1358          order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
1359              `desc` for descending order. Defaults to `asc`.
1360
1361          extra_headers: Send extra headers
1362
1363          extra_query: Add additional query parameters to the request
1364
1365          extra_body: Add additional JSON properties to the request
1366
1367          timeout: Override the client-level default timeout for this request, in seconds
1368        """
1369        return self._get_api_list(
1370            "/chat/completions",
1371            page=SyncCursorPage[ChatCompletion],
1372            options=make_request_options(
1373                extra_headers=extra_headers,
1374                extra_query=extra_query,
1375                extra_body=extra_body,
1376                timeout=timeout,
1377                query=maybe_transform(
1378                    {
1379                        "after": after,
1380                        "limit": limit,
1381                        "metadata": metadata,
1382                        "model": model,
1383                        "order": order,
1384                    },
1385                    completion_list_params.CompletionListParams,
1386                ),
1387            ),
1388            model=ChatCompletion,
1389        )
1390
1391    def delete(
1392        self,
1393        completion_id: str,
1394        *,
1395        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1396        # The extra values given here take precedence over values defined on the client or passed to this method.
1397        extra_headers: Headers | None = None,
1398        extra_query: Query | None = None,
1399        extra_body: Body | None = None,
1400        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1401    ) -> ChatCompletionDeleted:
1402        """Delete a stored chat completion.
1403
1404        Only Chat Completions that have been created
1405        with the `store` parameter set to `true` can be deleted.
1406
1407        Args:
1408          extra_headers: Send extra headers
1409
1410          extra_query: Add additional query parameters to the request
1411
1412          extra_body: Add additional JSON properties to the request
1413
1414          timeout: Override the client-level default timeout for this request, in seconds
1415        """
1416        if not completion_id:
1417            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
1418        return self._delete(
1419            f"/chat/completions/{completion_id}",
1420            options=make_request_options(
1421                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1422            ),
1423            cast_to=ChatCompletionDeleted,
1424        )
1425
1426    def stream(
1427        self,
1428        *,
1429        messages: Iterable[ChatCompletionMessageParam],
1430        model: Union[str, ChatModel],
1431        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
1432        response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | Omit = omit,
1433        frequency_penalty: Optional[float] | Omit = omit,
1434        function_call: completion_create_params.FunctionCall | Omit = omit,
1435        functions: Iterable[completion_create_params.Function] | Omit = omit,
1436        logit_bias: Optional[Dict[str, int]] | Omit = omit,
1437        logprobs: Optional[bool] | Omit = omit,
1438        max_completion_tokens: Optional[int] | Omit = omit,
1439        max_tokens: Optional[int] | Omit = omit,
1440        metadata: Optional[Metadata] | Omit = omit,
1441        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
1442        n: Optional[int] | Omit = omit,
1443        parallel_tool_calls: bool | Omit = omit,
1444        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
1445        presence_penalty: Optional[float] | Omit = omit,
1446        prompt_cache_key: str | Omit = omit,
1447        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1448        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
1449        safety_identifier: str | Omit = omit,
1450        seed: Optional[int] | Omit = omit,
1451        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1452        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
1453        store: Optional[bool] | Omit = omit,
1454        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
1455        temperature: Optional[float] | Omit = omit,
1456        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
1457        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
1458        top_logprobs: Optional[int] | Omit = omit,
1459        top_p: Optional[float] | Omit = omit,
1460        user: str | Omit = omit,
1461        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
1462        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
1463        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1464        # The extra values given here take precedence over values defined on the client or passed to this method.
1465        extra_headers: Headers | None = None,
1466        extra_query: Query | None = None,
1467        extra_body: Body | None = None,
1468        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1469    ) -> ChatCompletionStreamManager[ResponseFormatT]:
1470        """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
1471        and automatic accumulation of each delta.
1472
1473        This also supports all of the parsing utilities that `.parse()` does.
1474
1475        Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
1476
1477        ```py
1478        with client.chat.completions.stream(
1479            model="gpt-4o-2024-08-06",
1480            messages=[...],
1481        ) as stream:
1482            for event in stream:
1483                if event.type == "content.delta":
1484                    print(event.delta, flush=True, end="")
1485        ```
1486
1487        When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
1488
1489        When the context manager exits, the response will be closed, however the `stream` instance is still available outside
1490        the context manager.
1491        """
1492        extra_headers = {
1493            "X-Stainless-Helper-Method": "chat.completions.stream",
1494            **(extra_headers or {}),
1495        }
1496
1497        api_request: partial[Stream[ChatCompletionChunk]] = partial(
1498            self.create,
1499            messages=messages,
1500            model=model,
1501            audio=audio,
1502            stream=True,
1503            response_format=_type_to_response_format(response_format),
1504            frequency_penalty=frequency_penalty,
1505            function_call=function_call,
1506            functions=functions,
1507            logit_bias=logit_bias,
1508            logprobs=logprobs,
1509            max_completion_tokens=max_completion_tokens,
1510            max_tokens=max_tokens,
1511            metadata=metadata,
1512            modalities=modalities,
1513            n=n,
1514            parallel_tool_calls=parallel_tool_calls,
1515            prediction=prediction,
1516            presence_penalty=presence_penalty,
1517            prompt_cache_key=prompt_cache_key,
1518            prompt_cache_retention=prompt_cache_retention,
1519            reasoning_effort=reasoning_effort,
1520            safety_identifier=safety_identifier,
1521            seed=seed,
1522            service_tier=service_tier,
1523            store=store,
1524            stop=stop,
1525            stream_options=stream_options,
1526            temperature=temperature,
1527            tool_choice=tool_choice,
1528            tools=tools,
1529            top_logprobs=top_logprobs,
1530            top_p=top_p,
1531            user=user,
1532            verbosity=verbosity,
1533            web_search_options=web_search_options,
1534            extra_headers=extra_headers,
1535            extra_query=extra_query,
1536            extra_body=extra_body,
1537            timeout=timeout,
1538        )
1539        return ChatCompletionStreamManager(
1540            api_request,
1541            response_format=response_format,
1542            input_tools=tools,
1543        )
1544
1545
1546class AsyncCompletions(AsyncAPIResource):
1547    @cached_property
1548    def messages(self) -> AsyncMessages:
1549        return AsyncMessages(self._client)
1550
1551    @cached_property
1552    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
1553        """
1554        This property can be used as a prefix for any HTTP method call to return
1555        the raw response object instead of the parsed content.
1556
1557        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
1558        """
1559        return AsyncCompletionsWithRawResponse(self)
1560
1561    @cached_property
1562    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
1563        """
1564        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
1565
1566        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
1567        """
1568        return AsyncCompletionsWithStreamingResponse(self)
1569
1570    async def parse(
1571        self,
1572        *,
1573        messages: Iterable[ChatCompletionMessageParam],
1574        model: Union[str, ChatModel],
1575        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
1576        response_format: type[ResponseFormatT] | Omit = omit,
1577        frequency_penalty: Optional[float] | Omit = omit,
1578        function_call: completion_create_params.FunctionCall | Omit = omit,
1579        functions: Iterable[completion_create_params.Function] | Omit = omit,
1580        logit_bias: Optional[Dict[str, int]] | Omit = omit,
1581        logprobs: Optional[bool] | Omit = omit,
1582        max_completion_tokens: Optional[int] | Omit = omit,
1583        max_tokens: Optional[int] | Omit = omit,
1584        metadata: Optional[Metadata] | Omit = omit,
1585        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
1586        n: Optional[int] | Omit = omit,
1587        parallel_tool_calls: bool | Omit = omit,
1588        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
1589        presence_penalty: Optional[float] | Omit = omit,
1590        prompt_cache_key: str | Omit = omit,
1591        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1592        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
1593        safety_identifier: str | Omit = omit,
1594        seed: Optional[int] | Omit = omit,
1595        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1596        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
1597        store: Optional[bool] | Omit = omit,
1598        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
1599        temperature: Optional[float] | Omit = omit,
1600        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
1601        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
1602        top_logprobs: Optional[int] | Omit = omit,
1603        top_p: Optional[float] | Omit = omit,
1604        user: str | Omit = omit,
1605        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
1606        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
1607        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1608        # The extra values given here take precedence over values defined on the client or passed to this method.
1609        extra_headers: Headers | None = None,
1610        extra_query: Query | None = None,
1611        extra_body: Body | None = None,
1612        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1613    ) -> ParsedChatCompletion[ResponseFormatT]:
1614        """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
1615        & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
1616
1617        You can pass a pydantic model to this method and it will automatically convert the model
1618        into a JSON schema, send it to the API and parse the response content back into the given model.
1619
1620        This method will also automatically parse `function` tool calls if:
1621        - You use the `openai.pydantic_function_tool()` helper method
1622        - You mark your tool schema with `"strict": True`
1623
1624        Example usage:
1625        ```py
1626        from pydantic import BaseModel
1627        from openai import AsyncOpenAI
1628
1629
1630        class Step(BaseModel):
1631            explanation: str
1632            output: str
1633
1634
1635        class MathResponse(BaseModel):
1636            steps: List[Step]
1637            final_answer: str
1638
1639
1640        client = AsyncOpenAI()
1641        completion = await client.chat.completions.parse(
1642            model="gpt-4o-2024-08-06",
1643            messages=[
1644                {"role": "system", "content": "You are a helpful math tutor."},
1645                {"role": "user", "content": "solve 8x + 31 = 2"},
1646            ],
1647            response_format=MathResponse,
1648        )
1649
1650        message = completion.choices[0].message
1651        if message.parsed:
1652            print(message.parsed.steps)
1653            print("answer: ", message.parsed.final_answer)
1654        ```
1655        """
1656        _validate_input_tools(tools)
1657
1658        extra_headers = {
1659            "X-Stainless-Helper-Method": "chat.completions.parse",
1660            **(extra_headers or {}),
1661        }
1662
1663        def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
1664            return _parse_chat_completion(
1665                response_format=response_format,
1666                chat_completion=raw_completion,
1667                input_tools=tools,
1668            )
1669
1670        return await self._post(
1671            "/chat/completions",
1672            body=await async_maybe_transform(
1673                {
1674                    "messages": messages,
1675                    "model": model,
1676                    "audio": audio,
1677                    "frequency_penalty": frequency_penalty,
1678                    "function_call": function_call,
1679                    "functions": functions,
1680                    "logit_bias": logit_bias,
1681                    "logprobs": logprobs,
1682                    "max_completion_tokens": max_completion_tokens,
1683                    "max_tokens": max_tokens,
1684                    "metadata": metadata,
1685                    "modalities": modalities,
1686                    "n": n,
1687                    "parallel_tool_calls": parallel_tool_calls,
1688                    "prediction": prediction,
1689                    "presence_penalty": presence_penalty,
1690                    "prompt_cache_key": prompt_cache_key,
1691                    "prompt_cache_retention": prompt_cache_retention,
1692                    "reasoning_effort": reasoning_effort,
1693                    "response_format": _type_to_response_format(response_format),
1694                    "safety_identifier": safety_identifier,
1695                    "seed": seed,
1696                    "service_tier": service_tier,
1697                    "store": store,
1698                    "stop": stop,
1699                    "stream": False,
1700                    "stream_options": stream_options,
1701                    "temperature": temperature,
1702                    "tool_choice": tool_choice,
1703                    "tools": tools,
1704                    "top_logprobs": top_logprobs,
1705                    "top_p": top_p,
1706                    "user": user,
1707                    "verbosity": verbosity,
1708                    "web_search_options": web_search_options,
1709                },
1710                completion_create_params.CompletionCreateParams,
1711            ),
1712            options=make_request_options(
1713                extra_headers=extra_headers,
1714                extra_query=extra_query,
1715                extra_body=extra_body,
1716                timeout=timeout,
1717                post_parser=parser,
1718            ),
1719            # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
1720            # in the `parser` function above
1721            cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
1722            stream=False,
1723        )
1724
1725    @overload
1726    async def create(
1727        self,
1728        *,
1729        messages: Iterable[ChatCompletionMessageParam],
1730        model: Union[str, ChatModel],
1731        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
1732        frequency_penalty: Optional[float] | Omit = omit,
1733        function_call: completion_create_params.FunctionCall | Omit = omit,
1734        functions: Iterable[completion_create_params.Function] | Omit = omit,
1735        logit_bias: Optional[Dict[str, int]] | Omit = omit,
1736        logprobs: Optional[bool] | Omit = omit,
1737        max_completion_tokens: Optional[int] | Omit = omit,
1738        max_tokens: Optional[int] | Omit = omit,
1739        metadata: Optional[Metadata] | Omit = omit,
1740        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
1741        n: Optional[int] | Omit = omit,
1742        parallel_tool_calls: bool | Omit = omit,
1743        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
1744        presence_penalty: Optional[float] | Omit = omit,
1745        prompt_cache_key: str | Omit = omit,
1746        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
1747        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
1748        response_format: completion_create_params.ResponseFormat | Omit = omit,
1749        safety_identifier: str | Omit = omit,
1750        seed: Optional[int] | Omit = omit,
1751        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
1752        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
1753        store: Optional[bool] | Omit = omit,
1754        stream: Optional[Literal[False]] | Omit = omit,
1755        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
1756        temperature: Optional[float] | Omit = omit,
1757        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
1758        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
1759        top_logprobs: Optional[int] | Omit = omit,
1760        top_p: Optional[float] | Omit = omit,
1761        user: str | Omit = omit,
1762        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
1763        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
1764        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1765        # The extra values given here take precedence over values defined on the client or passed to this method.
1766        extra_headers: Headers | None = None,
1767        extra_query: Query | None = None,
1768        extra_body: Body | None = None,
1769        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1770    ) -> ChatCompletion:
1771        """
1772        **Starting a new project?** We recommend trying
1773        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
1774        advantage of the latest OpenAI platform features. Compare
1775        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
1776
1777        ---
1778
1779        Creates a model response for the given chat conversation. Learn more in the
1780        [text generation](https://platform.openai.com/docs/guides/text-generation),
1781        [vision](https://platform.openai.com/docs/guides/vision), and
1782        [audio](https://platform.openai.com/docs/guides/audio) guides.
1783
1784        Parameter support can differ depending on the model used to generate the
1785        response, particularly for newer reasoning models. Parameters that are only
1786        supported for reasoning models are noted below. For the current state of
1787        unsupported parameters in reasoning models,
1788        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
1789
1790        Args:
1791          messages: A list of messages comprising the conversation so far. Depending on the
1792              [model](https://platform.openai.com/docs/models) you use, different message
1793              types (modalities) are supported, like
1794              [text](https://platform.openai.com/docs/guides/text-generation),
1795              [images](https://platform.openai.com/docs/guides/vision), and
1796              [audio](https://platform.openai.com/docs/guides/audio).
1797
1798          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
1799              wide range of models with different capabilities, performance characteristics,
1800              and price points. Refer to the
1801              [model guide](https://platform.openai.com/docs/models) to browse and compare
1802              available models.
1803
1804          audio: Parameters for audio output. Required when audio output is requested with
1805              `modalities: ["audio"]`.
1806              [Learn more](https://platform.openai.com/docs/guides/audio).
1807
1808          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
1809              existing frequency in the text so far, decreasing the model's likelihood to
1810              repeat the same line verbatim.
1811
1812          function_call: Deprecated in favor of `tool_choice`.
1813
1814              Controls which (if any) function is called by the model.
1815
1816              `none` means the model will not call a function and instead generates a message.
1817
1818              `auto` means the model can pick between generating a message or calling a
1819              function.
1820
1821              Specifying a particular function via `{"name": "my_function"}` forces the model
1822              to call that function.
1823
1824              `none` is the default when no functions are present. `auto` is the default if
1825              functions are present.
1826
1827          functions: Deprecated in favor of `tools`.
1828
1829              A list of functions the model may generate JSON inputs for.
1830
1831          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
1832
1833              Accepts a JSON object that maps tokens (specified by their token ID in the
1834              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
1835              bias is added to the logits generated by the model prior to sampling. The exact
1836              effect will vary per model, but values between -1 and 1 should decrease or
1837              increase likelihood of selection; values like -100 or 100 should result in a ban
1838              or exclusive selection of the relevant token.
1839
1840          logprobs: Whether to return log probabilities of the output tokens or not. If true,
1841              returns the log probabilities of each output token returned in the `content` of
1842              `message`.
1843
1844          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
1845              including visible output tokens and
1846              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
1847
1848          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
1849              completion. This value can be used to control
1850              [costs](https://openai.com/api/pricing/) for text generated via API.
1851
1852              This value is now deprecated in favor of `max_completion_tokens`, and is not
1853              compatible with
1854              [o-series models](https://platform.openai.com/docs/guides/reasoning).
1855
1856          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
1857              for storing additional information about the object in a structured format, and
1858              querying for objects via API or the dashboard.
1859
1860              Keys are strings with a maximum length of 64 characters. Values are strings with
1861              a maximum length of 512 characters.
1862
1863          modalities: Output types that you would like the model to generate. Most models are capable
1864              of generating text, which is the default:
1865
1866              `["text"]`
1867
1868              The `gpt-4o-audio-preview` model can also be used to
1869              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
1870              this model generate both text and audio responses, you can use:
1871
1872              `["text", "audio"]`
1873
1874          n: How many chat completion choices to generate for each input message. Note that
1875              you will be charged based on the number of generated tokens across all of the
1876              choices. Keep `n` as `1` to minimize costs.
1877
1878          parallel_tool_calls: Whether to enable
1879              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
1880              during tool use.
1881
1882          prediction: Static predicted output content, such as the content of a text file that is
1883              being regenerated.
1884
1885          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
1886              whether they appear in the text so far, increasing the model's likelihood to
1887              talk about new topics.
1888
1889          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
1890              hit rates. Replaces the `user` field.
1891              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
1892
1893          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
1894              prompt caching, which keeps cached prefixes active for longer, up to a maximum
1895              of 24 hours.
1896              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
1897
1898          reasoning_effort: Constrains effort on reasoning for
1899              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
1900              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
1901              Reducing reasoning effort can result in faster responses and fewer tokens used
1902              on reasoning in a response.
1903
1904              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
1905                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
1906                calls are supported for all reasoning values in gpt-5.1.
1907              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
1908                support `none`.
1909              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
1910              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
1911
1912          response_format: An object specifying the format that the model must output.
1913
1914              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
1915              Outputs which ensures the model will match your supplied JSON schema. Learn more
1916              in the
1917              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
1918
1919              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
1920              ensures the message the model generates is valid JSON. Using `json_schema` is
1921              preferred for models that support it.
1922
1923          safety_identifier: A stable identifier used to help detect users of your application that may be
1924              violating OpenAI's usage policies. The IDs should be a string that uniquely
1925              identifies each user. We recommend hashing their username or email address, in
1926              order to avoid sending us any identifying information.
1927              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
1928
1929          seed: This feature is in Beta. If specified, our system will make a best effort to
1930              sample deterministically, such that repeated requests with the same `seed` and
1931              parameters should return the same result. Determinism is not guaranteed, and you
1932              should refer to the `system_fingerprint` response parameter to monitor changes
1933              in the backend.
1934
1935          service_tier: Specifies the processing type used for serving the request.
1936
1937              - If set to 'auto', then the request will be processed with the service tier
1938                configured in the Project settings. Unless otherwise configured, the Project
1939                will use 'default'.
1940              - If set to 'default', then the request will be processed with the standard
1941                pricing and performance for the selected model.
1942              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
1943                '[priority](https://openai.com/api-priority-processing/)', then the request
1944                will be processed with the corresponding service tier.
1945              - When not set, the default behavior is 'auto'.
1946
1947              When the `service_tier` parameter is set, the response body will include the
1948              `service_tier` value based on the processing mode actually used to serve the
1949              request. This response value may be different from the value set in the
1950              parameter.
1951
1952          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
1953
1954              Up to 4 sequences where the API will stop generating further tokens. The
1955              returned text will not contain the stop sequence.
1956
1957          store: Whether or not to store the output of this chat completion request for use in
1958              our [model distillation](https://platform.openai.com/docs/guides/distillation)
1959              or [evals](https://platform.openai.com/docs/guides/evals) products.
1960
1961              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
1962
1963          stream: If set to true, the model response data will be streamed to the client as it is
1964              generated using
1965              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
1966              See the
1967              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
1968              for more information, along with the
1969              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
1970              guide for more information on how to handle the streaming events.
1971
1972          stream_options: Options for streaming response. Only set this when you set `stream: true`.
1973
1974          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1975              make the output more random, while lower values like 0.2 will make it more
1976              focused and deterministic. We generally recommend altering this or `top_p` but
1977              not both.
1978
1979          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
1980              not call any tool and instead generates a message. `auto` means the model can
1981              pick between generating a message or calling one or more tools. `required` means
1982              the model must call one or more tools. Specifying a particular tool via
1983              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
1984              call that tool.
1985
1986              `none` is the default when no tools are present. `auto` is the default if tools
1987              are present.
1988
1989          tools: A list of tools the model may call. You can provide either
1990              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
1991              or [function tools](https://platform.openai.com/docs/guides/function-calling).
1992
1993          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
1994              return at each token position, each with an associated log probability.
1995              `logprobs` must be set to `true` if this parameter is used.
1996
1997          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1998              model considers the results of the tokens with top_p probability mass. So 0.1
1999              means only the tokens comprising the top 10% probability mass are considered.
2000
2001              We generally recommend altering this or `temperature` but not both.
2002
2003          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
2004              `prompt_cache_key` instead to maintain caching optimizations. A stable
2005              identifier for your end-users. Used to boost cache hit rates by better bucketing
2006              similar requests and to help OpenAI detect and prevent abuse.
2007              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2008
2009          verbosity: Constrains the verbosity of the model's response. Lower values will result in
2010              more concise responses, while higher values will result in more verbose
2011              responses. Currently supported values are `low`, `medium`, and `high`.
2012
2013          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
2014              about the
2015              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
2016
2017          extra_headers: Send extra headers
2018
2019          extra_query: Add additional query parameters to the request
2020
2021          extra_body: Add additional JSON properties to the request
2022
2023          timeout: Override the client-level default timeout for this request, in seconds
2024        """
2025        ...
2026
2027    @overload
2028    async def create(
2029        self,
2030        *,
2031        messages: Iterable[ChatCompletionMessageParam],
2032        model: Union[str, ChatModel],
2033        stream: Literal[True],
2034        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
2035        frequency_penalty: Optional[float] | Omit = omit,
2036        function_call: completion_create_params.FunctionCall | Omit = omit,
2037        functions: Iterable[completion_create_params.Function] | Omit = omit,
2038        logit_bias: Optional[Dict[str, int]] | Omit = omit,
2039        logprobs: Optional[bool] | Omit = omit,
2040        max_completion_tokens: Optional[int] | Omit = omit,
2041        max_tokens: Optional[int] | Omit = omit,
2042        metadata: Optional[Metadata] | Omit = omit,
2043        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
2044        n: Optional[int] | Omit = omit,
2045        parallel_tool_calls: bool | Omit = omit,
2046        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
2047        presence_penalty: Optional[float] | Omit = omit,
2048        prompt_cache_key: str | Omit = omit,
2049        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2050        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
2051        response_format: completion_create_params.ResponseFormat | Omit = omit,
2052        safety_identifier: str | Omit = omit,
2053        seed: Optional[int] | Omit = omit,
2054        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2055        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
2056        store: Optional[bool] | Omit = omit,
2057        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
2058        temperature: Optional[float] | Omit = omit,
2059        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
2060        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
2061        top_logprobs: Optional[int] | Omit = omit,
2062        top_p: Optional[float] | Omit = omit,
2063        user: str | Omit = omit,
2064        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
2065        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
2066        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2067        # The extra values given here take precedence over values defined on the client or passed to this method.
2068        extra_headers: Headers | None = None,
2069        extra_query: Query | None = None,
2070        extra_body: Body | None = None,
2071        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2072    ) -> AsyncStream[ChatCompletionChunk]:
2073        """
2074        **Starting a new project?** We recommend trying
2075        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
2076        advantage of the latest OpenAI platform features. Compare
2077        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
2078
2079        ---
2080
2081        Creates a model response for the given chat conversation. Learn more in the
2082        [text generation](https://platform.openai.com/docs/guides/text-generation),
2083        [vision](https://platform.openai.com/docs/guides/vision), and
2084        [audio](https://platform.openai.com/docs/guides/audio) guides.
2085
2086        Parameter support can differ depending on the model used to generate the
2087        response, particularly for newer reasoning models. Parameters that are only
2088        supported for reasoning models are noted below. For the current state of
2089        unsupported parameters in reasoning models,
2090        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
2091
2092        Args:
2093          messages: A list of messages comprising the conversation so far. Depending on the
2094              [model](https://platform.openai.com/docs/models) you use, different message
2095              types (modalities) are supported, like
2096              [text](https://platform.openai.com/docs/guides/text-generation),
2097              [images](https://platform.openai.com/docs/guides/vision), and
2098              [audio](https://platform.openai.com/docs/guides/audio).
2099
2100          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
2101              wide range of models with different capabilities, performance characteristics,
2102              and price points. Refer to the
2103              [model guide](https://platform.openai.com/docs/models) to browse and compare
2104              available models.
2105
2106          stream: If set to true, the model response data will be streamed to the client as it is
2107              generated using
2108              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2109              See the
2110              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
2111              for more information, along with the
2112              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
2113              guide for more information on how to handle the streaming events.
2114
2115          audio: Parameters for audio output. Required when audio output is requested with
2116              `modalities: ["audio"]`.
2117              [Learn more](https://platform.openai.com/docs/guides/audio).
2118
2119          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
2120              existing frequency in the text so far, decreasing the model's likelihood to
2121              repeat the same line verbatim.
2122
2123          function_call: Deprecated in favor of `tool_choice`.
2124
2125              Controls which (if any) function is called by the model.
2126
2127              `none` means the model will not call a function and instead generates a message.
2128
2129              `auto` means the model can pick between generating a message or calling a
2130              function.
2131
2132              Specifying a particular function via `{"name": "my_function"}` forces the model
2133              to call that function.
2134
2135              `none` is the default when no functions are present. `auto` is the default if
2136              functions are present.
2137
2138          functions: Deprecated in favor of `tools`.
2139
2140              A list of functions the model may generate JSON inputs for.
2141
2142          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
2143
2144              Accepts a JSON object that maps tokens (specified by their token ID in the
2145              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
2146              bias is added to the logits generated by the model prior to sampling. The exact
2147              effect will vary per model, but values between -1 and 1 should decrease or
2148              increase likelihood of selection; values like -100 or 100 should result in a ban
2149              or exclusive selection of the relevant token.
2150
2151          logprobs: Whether to return log probabilities of the output tokens or not. If true,
2152              returns the log probabilities of each output token returned in the `content` of
2153              `message`.
2154
2155          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
2156              including visible output tokens and
2157              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
2158
2159          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
2160              completion. This value can be used to control
2161              [costs](https://openai.com/api/pricing/) for text generated via API.
2162
2163              This value is now deprecated in favor of `max_completion_tokens`, and is not
2164              compatible with
2165              [o-series models](https://platform.openai.com/docs/guides/reasoning).
2166
2167          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2168              for storing additional information about the object in a structured format, and
2169              querying for objects via API or the dashboard.
2170
2171              Keys are strings with a maximum length of 64 characters. Values are strings with
2172              a maximum length of 512 characters.
2173
2174          modalities: Output types that you would like the model to generate. Most models are capable
2175              of generating text, which is the default:
2176
2177              `["text"]`
2178
2179              The `gpt-4o-audio-preview` model can also be used to
2180              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
2181              this model generate both text and audio responses, you can use:
2182
2183              `["text", "audio"]`
2184
2185          n: How many chat completion choices to generate for each input message. Note that
2186              you will be charged based on the number of generated tokens across all of the
2187              choices. Keep `n` as `1` to minimize costs.
2188
2189          parallel_tool_calls: Whether to enable
2190              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
2191              during tool use.
2192
2193          prediction: Static predicted output content, such as the content of a text file that is
2194              being regenerated.
2195
2196          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
2197              whether they appear in the text so far, increasing the model's likelihood to
2198              talk about new topics.
2199
2200          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
2201              hit rates. Replaces the `user` field.
2202              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
2203
2204          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
2205              prompt caching, which keeps cached prefixes active for longer, up to a maximum
2206              of 24 hours.
2207              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
2208
2209          reasoning_effort: Constrains effort on reasoning for
2210              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
2211              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
2212              Reducing reasoning effort can result in faster responses and fewer tokens used
2213              on reasoning in a response.
2214
2215              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
2216                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
2217                calls are supported for all reasoning values in gpt-5.1.
2218              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
2219                support `none`.
2220              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
2221              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
2222
2223          response_format: An object specifying the format that the model must output.
2224
2225              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
2226              Outputs which ensures the model will match your supplied JSON schema. Learn more
2227              in the
2228              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
2229
2230              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
2231              ensures the message the model generates is valid JSON. Using `json_schema` is
2232              preferred for models that support it.
2233
2234          safety_identifier: A stable identifier used to help detect users of your application that may be
2235              violating OpenAI's usage policies. The IDs should be a string that uniquely
2236              identifies each user. We recommend hashing their username or email address, in
2237              order to avoid sending us any identifying information.
2238              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2239
2240          seed: This feature is in Beta. If specified, our system will make a best effort to
2241              sample deterministically, such that repeated requests with the same `seed` and
2242              parameters should return the same result. Determinism is not guaranteed, and you
2243              should refer to the `system_fingerprint` response parameter to monitor changes
2244              in the backend.
2245
2246          service_tier: Specifies the processing type used for serving the request.
2247
2248              - If set to 'auto', then the request will be processed with the service tier
2249                configured in the Project settings. Unless otherwise configured, the Project
2250                will use 'default'.
2251              - If set to 'default', then the request will be processed with the standard
2252                pricing and performance for the selected model.
2253              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
2254                '[priority](https://openai.com/api-priority-processing/)', then the request
2255                will be processed with the corresponding service tier.
2256              - When not set, the default behavior is 'auto'.
2257
2258              When the `service_tier` parameter is set, the response body will include the
2259              `service_tier` value based on the processing mode actually used to serve the
2260              request. This response value may be different from the value set in the
2261              parameter.
2262
2263          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
2264
2265              Up to 4 sequences where the API will stop generating further tokens. The
2266              returned text will not contain the stop sequence.
2267
2268          store: Whether or not to store the output of this chat completion request for use in
2269              our [model distillation](https://platform.openai.com/docs/guides/distillation)
2270              or [evals](https://platform.openai.com/docs/guides/evals) products.
2271
2272              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
2273
2274          stream_options: Options for streaming response. Only set this when you set `stream: true`.
2275
2276          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
2277              make the output more random, while lower values like 0.2 will make it more
2278              focused and deterministic. We generally recommend altering this or `top_p` but
2279              not both.
2280
2281          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
2282              not call any tool and instead generates a message. `auto` means the model can
2283              pick between generating a message or calling one or more tools. `required` means
2284              the model must call one or more tools. Specifying a particular tool via
2285              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
2286              call that tool.
2287
2288              `none` is the default when no tools are present. `auto` is the default if tools
2289              are present.
2290
2291          tools: A list of tools the model may call. You can provide either
2292              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
2293              or [function tools](https://platform.openai.com/docs/guides/function-calling).
2294
2295          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
2296              return at each token position, each with an associated log probability.
2297              `logprobs` must be set to `true` if this parameter is used.
2298
2299          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
2300              model considers the results of the tokens with top_p probability mass. So 0.1
2301              means only the tokens comprising the top 10% probability mass are considered.
2302
2303              We generally recommend altering this or `temperature` but not both.
2304
2305          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
2306              `prompt_cache_key` instead to maintain caching optimizations. A stable
2307              identifier for your end-users. Used to boost cache hit rates by better bucketing
2308              similar requests and to help OpenAI detect and prevent abuse.
2309              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2310
2311          verbosity: Constrains the verbosity of the model's response. Lower values will result in
2312              more concise responses, while higher values will result in more verbose
2313              responses. Currently supported values are `low`, `medium`, and `high`.
2314
2315          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
2316              about the
2317              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
2318
2319          extra_headers: Send extra headers
2320
2321          extra_query: Add additional query parameters to the request
2322
2323          extra_body: Add additional JSON properties to the request
2324
2325          timeout: Override the client-level default timeout for this request, in seconds
2326        """
2327        ...
2328
2329    @overload
2330    async def create(
2331        self,
2332        *,
2333        messages: Iterable[ChatCompletionMessageParam],
2334        model: Union[str, ChatModel],
2335        stream: bool,
2336        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
2337        frequency_penalty: Optional[float] | Omit = omit,
2338        function_call: completion_create_params.FunctionCall | Omit = omit,
2339        functions: Iterable[completion_create_params.Function] | Omit = omit,
2340        logit_bias: Optional[Dict[str, int]] | Omit = omit,
2341        logprobs: Optional[bool] | Omit = omit,
2342        max_completion_tokens: Optional[int] | Omit = omit,
2343        max_tokens: Optional[int] | Omit = omit,
2344        metadata: Optional[Metadata] | Omit = omit,
2345        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
2346        n: Optional[int] | Omit = omit,
2347        parallel_tool_calls: bool | Omit = omit,
2348        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
2349        presence_penalty: Optional[float] | Omit = omit,
2350        prompt_cache_key: str | Omit = omit,
2351        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2352        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
2353        response_format: completion_create_params.ResponseFormat | Omit = omit,
2354        safety_identifier: str | Omit = omit,
2355        seed: Optional[int] | Omit = omit,
2356        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2357        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
2358        store: Optional[bool] | Omit = omit,
2359        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
2360        temperature: Optional[float] | Omit = omit,
2361        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
2362        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
2363        top_logprobs: Optional[int] | Omit = omit,
2364        top_p: Optional[float] | Omit = omit,
2365        user: str | Omit = omit,
2366        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
2367        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
2368        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2369        # The extra values given here take precedence over values defined on the client or passed to this method.
2370        extra_headers: Headers | None = None,
2371        extra_query: Query | None = None,
2372        extra_body: Body | None = None,
2373        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2374    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
2375        """
2376        **Starting a new project?** We recommend trying
2377        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
2378        advantage of the latest OpenAI platform features. Compare
2379        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
2380
2381        ---
2382
2383        Creates a model response for the given chat conversation. Learn more in the
2384        [text generation](https://platform.openai.com/docs/guides/text-generation),
2385        [vision](https://platform.openai.com/docs/guides/vision), and
2386        [audio](https://platform.openai.com/docs/guides/audio) guides.
2387
2388        Parameter support can differ depending on the model used to generate the
2389        response, particularly for newer reasoning models. Parameters that are only
2390        supported for reasoning models are noted below. For the current state of
2391        unsupported parameters in reasoning models,
2392        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
2393
2394        Args:
2395          messages: A list of messages comprising the conversation so far. Depending on the
2396              [model](https://platform.openai.com/docs/models) you use, different message
2397              types (modalities) are supported, like
2398              [text](https://platform.openai.com/docs/guides/text-generation),
2399              [images](https://platform.openai.com/docs/guides/vision), and
2400              [audio](https://platform.openai.com/docs/guides/audio).
2401
2402          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
2403              wide range of models with different capabilities, performance characteristics,
2404              and price points. Refer to the
2405              [model guide](https://platform.openai.com/docs/models) to browse and compare
2406              available models.
2407
2408          stream: If set to true, the model response data will be streamed to the client as it is
2409              generated using
2410              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
2411              See the
2412              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
2413              for more information, along with the
2414              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
2415              guide for more information on how to handle the streaming events.
2416
2417          audio: Parameters for audio output. Required when audio output is requested with
2418              `modalities: ["audio"]`.
2419              [Learn more](https://platform.openai.com/docs/guides/audio).
2420
2421          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
2422              existing frequency in the text so far, decreasing the model's likelihood to
2423              repeat the same line verbatim.
2424
2425          function_call: Deprecated in favor of `tool_choice`.
2426
2427              Controls which (if any) function is called by the model.
2428
2429              `none` means the model will not call a function and instead generates a message.
2430
2431              `auto` means the model can pick between generating a message or calling a
2432              function.
2433
2434              Specifying a particular function via `{"name": "my_function"}` forces the model
2435              to call that function.
2436
2437              `none` is the default when no functions are present. `auto` is the default if
2438              functions are present.
2439
2440          functions: Deprecated in favor of `tools`.
2441
2442              A list of functions the model may generate JSON inputs for.
2443
2444          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
2445
2446              Accepts a JSON object that maps tokens (specified by their token ID in the
2447              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
2448              bias is added to the logits generated by the model prior to sampling. The exact
2449              effect will vary per model, but values between -1 and 1 should decrease or
2450              increase likelihood of selection; values like -100 or 100 should result in a ban
2451              or exclusive selection of the relevant token.
2452
2453          logprobs: Whether to return log probabilities of the output tokens or not. If true,
2454              returns the log probabilities of each output token returned in the `content` of
2455              `message`.
2456
2457          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
2458              including visible output tokens and
2459              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
2460
2461          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
2462              completion. This value can be used to control
2463              [costs](https://openai.com/api/pricing/) for text generated via API.
2464
2465              This value is now deprecated in favor of `max_completion_tokens`, and is not
2466              compatible with
2467              [o-series models](https://platform.openai.com/docs/guides/reasoning).
2468
2469          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2470              for storing additional information about the object in a structured format, and
2471              querying for objects via API or the dashboard.
2472
2473              Keys are strings with a maximum length of 64 characters. Values are strings with
2474              a maximum length of 512 characters.
2475
2476          modalities: Output types that you would like the model to generate. Most models are capable
2477              of generating text, which is the default:
2478
2479              `["text"]`
2480
2481              The `gpt-4o-audio-preview` model can also be used to
2482              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
2483              this model generate both text and audio responses, you can use:
2484
2485              `["text", "audio"]`
2486
2487          n: How many chat completion choices to generate for each input message. Note that
2488              you will be charged based on the number of generated tokens across all of the
2489              choices. Keep `n` as `1` to minimize costs.
2490
2491          parallel_tool_calls: Whether to enable
2492              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
2493              during tool use.
2494
2495          prediction: Static predicted output content, such as the content of a text file that is
2496              being regenerated.
2497
2498          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
2499              whether they appear in the text so far, increasing the model's likelihood to
2500              talk about new topics.
2501
2502          prompt_cache_key: Used by OpenAI to cache responses for similar requests to optimize your cache
2503              hit rates. Replaces the `user` field.
2504              [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
2505
2506          prompt_cache_retention: The retention policy for the prompt cache. Set to `24h` to enable extended
2507              prompt caching, which keeps cached prefixes active for longer, up to a maximum
2508              of 24 hours.
2509              [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
2510
2511          reasoning_effort: Constrains effort on reasoning for
2512              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
2513              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
2514              Reducing reasoning effort can result in faster responses and fewer tokens used
2515              on reasoning in a response.
2516
2517              - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
2518                reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
2519                calls are supported for all reasoning values in gpt-5.1.
2520              - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
2521                support `none`.
2522              - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
2523              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
2524
2525          response_format: An object specifying the format that the model must output.
2526
2527              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
2528              Outputs which ensures the model will match your supplied JSON schema. Learn more
2529              in the
2530              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
2531
2532              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
2533              ensures the message the model generates is valid JSON. Using `json_schema` is
2534              preferred for models that support it.
2535
2536          safety_identifier: A stable identifier used to help detect users of your application that may be
2537              violating OpenAI's usage policies. The IDs should be a string that uniquely
2538              identifies each user. We recommend hashing their username or email address, in
2539              order to avoid sending us any identifying information.
2540              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2541
2542          seed: This feature is in Beta. If specified, our system will make a best effort to
2543              sample deterministically, such that repeated requests with the same `seed` and
2544              parameters should return the same result. Determinism is not guaranteed, and you
2545              should refer to the `system_fingerprint` response parameter to monitor changes
2546              in the backend.
2547
2548          service_tier: Specifies the processing type used for serving the request.
2549
2550              - If set to 'auto', then the request will be processed with the service tier
2551                configured in the Project settings. Unless otherwise configured, the Project
2552                will use 'default'.
2553              - If set to 'default', then the request will be processed with the standard
2554                pricing and performance for the selected model.
2555              - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
2556                '[priority](https://openai.com/api-priority-processing/)', then the request
2557                will be processed with the corresponding service tier.
2558              - When not set, the default behavior is 'auto'.
2559
2560              When the `service_tier` parameter is set, the response body will include the
2561              `service_tier` value based on the processing mode actually used to serve the
2562              request. This response value may be different from the value set in the
2563              parameter.
2564
2565          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
2566
2567              Up to 4 sequences where the API will stop generating further tokens. The
2568              returned text will not contain the stop sequence.
2569
2570          store: Whether or not to store the output of this chat completion request for use in
2571              our [model distillation](https://platform.openai.com/docs/guides/distillation)
2572              or [evals](https://platform.openai.com/docs/guides/evals) products.
2573
2574              Supports text and image inputs. Note: image inputs over 8MB will be dropped.
2575
2576          stream_options: Options for streaming response. Only set this when you set `stream: true`.
2577
2578          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
2579              make the output more random, while lower values like 0.2 will make it more
2580              focused and deterministic. We generally recommend altering this or `top_p` but
2581              not both.
2582
2583          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
2584              not call any tool and instead generates a message. `auto` means the model can
2585              pick between generating a message or calling one or more tools. `required` means
2586              the model must call one or more tools. Specifying a particular tool via
2587              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
2588              call that tool.
2589
2590              `none` is the default when no tools are present. `auto` is the default if tools
2591              are present.
2592
2593          tools: A list of tools the model may call. You can provide either
2594              [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
2595              or [function tools](https://platform.openai.com/docs/guides/function-calling).
2596
2597          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
2598              return at each token position, each with an associated log probability.
2599              `logprobs` must be set to `true` if this parameter is used.
2600
2601          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
2602              model considers the results of the tokens with top_p probability mass. So 0.1
2603              means only the tokens comprising the top 10% probability mass are considered.
2604
2605              We generally recommend altering this or `temperature` but not both.
2606
2607          user: This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use
2608              `prompt_cache_key` instead to maintain caching optimizations. A stable
2609              identifier for your end-users. Used to boost cache hit rates by better bucketing
2610              similar requests and to help OpenAI detect and prevent abuse.
2611              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
2612
2613          verbosity: Constrains the verbosity of the model's response. Lower values will result in
2614              more concise responses, while higher values will result in more verbose
2615              responses. Currently supported values are `low`, `medium`, and `high`.
2616
2617          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
2618              about the
2619              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
2620
2621          extra_headers: Send extra headers
2622
2623          extra_query: Add additional query parameters to the request
2624
2625          extra_body: Add additional JSON properties to the request
2626
2627          timeout: Override the client-level default timeout for this request, in seconds
2628        """
2629        ...
2630
2631    @required_args(["messages", "model"], ["messages", "model", "stream"])
2632    async def create(
2633        self,
2634        *,
2635        messages: Iterable[ChatCompletionMessageParam],
2636        model: Union[str, ChatModel],
2637        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
2638        frequency_penalty: Optional[float] | Omit = omit,
2639        function_call: completion_create_params.FunctionCall | Omit = omit,
2640        functions: Iterable[completion_create_params.Function] | Omit = omit,
2641        logit_bias: Optional[Dict[str, int]] | Omit = omit,
2642        logprobs: Optional[bool] | Omit = omit,
2643        max_completion_tokens: Optional[int] | Omit = omit,
2644        max_tokens: Optional[int] | Omit = omit,
2645        metadata: Optional[Metadata] | Omit = omit,
2646        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
2647        n: Optional[int] | Omit = omit,
2648        parallel_tool_calls: bool | Omit = omit,
2649        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
2650        presence_penalty: Optional[float] | Omit = omit,
2651        prompt_cache_key: str | Omit = omit,
2652        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2653        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
2654        response_format: completion_create_params.ResponseFormat | Omit = omit,
2655        safety_identifier: str | Omit = omit,
2656        seed: Optional[int] | Omit = omit,
2657        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2658        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
2659        store: Optional[bool] | Omit = omit,
2660        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
2661        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
2662        temperature: Optional[float] | Omit = omit,
2663        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
2664        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
2665        top_logprobs: Optional[int] | Omit = omit,
2666        top_p: Optional[float] | Omit = omit,
2667        user: str | Omit = omit,
2668        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
2669        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
2670        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2671        # The extra values given here take precedence over values defined on the client or passed to this method.
2672        extra_headers: Headers | None = None,
2673        extra_query: Query | None = None,
2674        extra_body: Body | None = None,
2675        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2676    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
2677        validate_response_format(response_format)
2678        return await self._post(
2679            "/chat/completions",
2680            body=await async_maybe_transform(
2681                {
2682                    "messages": messages,
2683                    "model": model,
2684                    "audio": audio,
2685                    "frequency_penalty": frequency_penalty,
2686                    "function_call": function_call,
2687                    "functions": functions,
2688                    "logit_bias": logit_bias,
2689                    "logprobs": logprobs,
2690                    "max_completion_tokens": max_completion_tokens,
2691                    "max_tokens": max_tokens,
2692                    "metadata": metadata,
2693                    "modalities": modalities,
2694                    "n": n,
2695                    "parallel_tool_calls": parallel_tool_calls,
2696                    "prediction": prediction,
2697                    "presence_penalty": presence_penalty,
2698                    "prompt_cache_key": prompt_cache_key,
2699                    "prompt_cache_retention": prompt_cache_retention,
2700                    "reasoning_effort": reasoning_effort,
2701                    "response_format": response_format,
2702                    "safety_identifier": safety_identifier,
2703                    "seed": seed,
2704                    "service_tier": service_tier,
2705                    "stop": stop,
2706                    "store": store,
2707                    "stream": stream,
2708                    "stream_options": stream_options,
2709                    "temperature": temperature,
2710                    "tool_choice": tool_choice,
2711                    "tools": tools,
2712                    "top_logprobs": top_logprobs,
2713                    "top_p": top_p,
2714                    "user": user,
2715                    "verbosity": verbosity,
2716                    "web_search_options": web_search_options,
2717                },
2718                completion_create_params.CompletionCreateParamsStreaming
2719                if stream
2720                else completion_create_params.CompletionCreateParamsNonStreaming,
2721            ),
2722            options=make_request_options(
2723                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
2724            ),
2725            cast_to=ChatCompletion,
2726            stream=stream or False,
2727            stream_cls=AsyncStream[ChatCompletionChunk],
2728        )
2729
2730    async def retrieve(
2731        self,
2732        completion_id: str,
2733        *,
2734        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2735        # The extra values given here take precedence over values defined on the client or passed to this method.
2736        extra_headers: Headers | None = None,
2737        extra_query: Query | None = None,
2738        extra_body: Body | None = None,
2739        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2740    ) -> ChatCompletion:
2741        """Get a stored chat completion.
2742
2743        Only Chat Completions that have been created with
2744        the `store` parameter set to `true` will be returned.
2745
2746        Args:
2747          extra_headers: Send extra headers
2748
2749          extra_query: Add additional query parameters to the request
2750
2751          extra_body: Add additional JSON properties to the request
2752
2753          timeout: Override the client-level default timeout for this request, in seconds
2754        """
2755        if not completion_id:
2756            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
2757        return await self._get(
2758            f"/chat/completions/{completion_id}",
2759            options=make_request_options(
2760                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
2761            ),
2762            cast_to=ChatCompletion,
2763        )
2764
2765    async def update(
2766        self,
2767        completion_id: str,
2768        *,
2769        metadata: Optional[Metadata],
2770        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2771        # The extra values given here take precedence over values defined on the client or passed to this method.
2772        extra_headers: Headers | None = None,
2773        extra_query: Query | None = None,
2774        extra_body: Body | None = None,
2775        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2776    ) -> ChatCompletion:
2777        """Modify a stored chat completion.
2778
2779        Only Chat Completions that have been created
2780        with the `store` parameter set to `true` can be modified. Currently, the only
2781        supported modification is to update the `metadata` field.
2782
2783        Args:
2784          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2785              for storing additional information about the object in a structured format, and
2786              querying for objects via API or the dashboard.
2787
2788              Keys are strings with a maximum length of 64 characters. Values are strings with
2789              a maximum length of 512 characters.
2790
2791          extra_headers: Send extra headers
2792
2793          extra_query: Add additional query parameters to the request
2794
2795          extra_body: Add additional JSON properties to the request
2796
2797          timeout: Override the client-level default timeout for this request, in seconds
2798        """
2799        if not completion_id:
2800            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
2801        return await self._post(
2802            f"/chat/completions/{completion_id}",
2803            body=await async_maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
2804            options=make_request_options(
2805                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
2806            ),
2807            cast_to=ChatCompletion,
2808        )
2809
2810    def list(
2811        self,
2812        *,
2813        after: str | Omit = omit,
2814        limit: int | Omit = omit,
2815        metadata: Optional[Metadata] | Omit = omit,
2816        model: str | Omit = omit,
2817        order: Literal["asc", "desc"] | Omit = omit,
2818        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2819        # The extra values given here take precedence over values defined on the client or passed to this method.
2820        extra_headers: Headers | None = None,
2821        extra_query: Query | None = None,
2822        extra_body: Body | None = None,
2823        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2824    ) -> AsyncPaginator[ChatCompletion, AsyncCursorPage[ChatCompletion]]:
2825        """List stored Chat Completions.
2826
2827        Only Chat Completions that have been stored with
2828        the `store` parameter set to `true` will be returned.
2829
2830        Args:
2831          after: Identifier for the last chat completion from the previous pagination request.
2832
2833          limit: Number of Chat Completions to retrieve.
2834
2835          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
2836              for storing additional information about the object in a structured format, and
2837              querying for objects via API or the dashboard.
2838
2839              Keys are strings with a maximum length of 64 characters. Values are strings with
2840              a maximum length of 512 characters.
2841
2842          model: The model used to generate the Chat Completions.
2843
2844          order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
2845              `desc` for descending order. Defaults to `asc`.
2846
2847          extra_headers: Send extra headers
2848
2849          extra_query: Add additional query parameters to the request
2850
2851          extra_body: Add additional JSON properties to the request
2852
2853          timeout: Override the client-level default timeout for this request, in seconds
2854        """
2855        return self._get_api_list(
2856            "/chat/completions",
2857            page=AsyncCursorPage[ChatCompletion],
2858            options=make_request_options(
2859                extra_headers=extra_headers,
2860                extra_query=extra_query,
2861                extra_body=extra_body,
2862                timeout=timeout,
2863                query=maybe_transform(
2864                    {
2865                        "after": after,
2866                        "limit": limit,
2867                        "metadata": metadata,
2868                        "model": model,
2869                        "order": order,
2870                    },
2871                    completion_list_params.CompletionListParams,
2872                ),
2873            ),
2874            model=ChatCompletion,
2875        )
2876
2877    async def delete(
2878        self,
2879        completion_id: str,
2880        *,
2881        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2882        # The extra values given here take precedence over values defined on the client or passed to this method.
2883        extra_headers: Headers | None = None,
2884        extra_query: Query | None = None,
2885        extra_body: Body | None = None,
2886        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2887    ) -> ChatCompletionDeleted:
2888        """Delete a stored chat completion.
2889
2890        Only Chat Completions that have been created
2891        with the `store` parameter set to `true` can be deleted.
2892
2893        Args:
2894          extra_headers: Send extra headers
2895
2896          extra_query: Add additional query parameters to the request
2897
2898          extra_body: Add additional JSON properties to the request
2899
2900          timeout: Override the client-level default timeout for this request, in seconds
2901        """
2902        if not completion_id:
2903            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
2904        return await self._delete(
2905            f"/chat/completions/{completion_id}",
2906            options=make_request_options(
2907                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
2908            ),
2909            cast_to=ChatCompletionDeleted,
2910        )
2911
2912    def stream(
2913        self,
2914        *,
2915        messages: Iterable[ChatCompletionMessageParam],
2916        model: Union[str, ChatModel],
2917        audio: Optional[ChatCompletionAudioParam] | Omit = omit,
2918        response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | Omit = omit,
2919        frequency_penalty: Optional[float] | Omit = omit,
2920        function_call: completion_create_params.FunctionCall | Omit = omit,
2921        functions: Iterable[completion_create_params.Function] | Omit = omit,
2922        logit_bias: Optional[Dict[str, int]] | Omit = omit,
2923        logprobs: Optional[bool] | Omit = omit,
2924        max_completion_tokens: Optional[int] | Omit = omit,
2925        max_tokens: Optional[int] | Omit = omit,
2926        metadata: Optional[Metadata] | Omit = omit,
2927        modalities: Optional[List[Literal["text", "audio"]]] | Omit = omit,
2928        n: Optional[int] | Omit = omit,
2929        parallel_tool_calls: bool | Omit = omit,
2930        prediction: Optional[ChatCompletionPredictionContentParam] | Omit = omit,
2931        presence_penalty: Optional[float] | Omit = omit,
2932        prompt_cache_key: str | Omit = omit,
2933        prompt_cache_retention: Optional[Literal["in-memory", "24h"]] | Omit = omit,
2934        reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
2935        safety_identifier: str | Omit = omit,
2936        seed: Optional[int] | Omit = omit,
2937        service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | Omit = omit,
2938        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
2939        store: Optional[bool] | Omit = omit,
2940        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
2941        temperature: Optional[float] | Omit = omit,
2942        tool_choice: ChatCompletionToolChoiceOptionParam | Omit = omit,
2943        tools: Iterable[ChatCompletionToolUnionParam] | Omit = omit,
2944        top_logprobs: Optional[int] | Omit = omit,
2945        top_p: Optional[float] | Omit = omit,
2946        user: str | Omit = omit,
2947        verbosity: Optional[Literal["low", "medium", "high"]] | Omit = omit,
2948        web_search_options: completion_create_params.WebSearchOptions | Omit = omit,
2949        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
2950        # The extra values given here take precedence over values defined on the client or passed to this method.
2951        extra_headers: Headers | None = None,
2952        extra_query: Query | None = None,
2953        extra_body: Body | None = None,
2954        timeout: float | httpx.Timeout | None | NotGiven = not_given,
2955    ) -> AsyncChatCompletionStreamManager[ResponseFormatT]:
2956        """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
2957        and automatic accumulation of each delta.
2958
2959        This also supports all of the parsing utilities that `.parse()` does.
2960
2961        Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
2962
2963        ```py
2964        async with client.chat.completions.stream(
2965            model="gpt-4o-2024-08-06",
2966            messages=[...],
2967        ) as stream:
2968            async for event in stream:
2969                if event.type == "content.delta":
2970                    print(event.delta, flush=True, end="")
2971        ```
2972
2973        When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
2974
2975        When the context manager exits, the response will be closed, however the `stream` instance is still available outside
2976        the context manager.
2977        """
2978        _validate_input_tools(tools)
2979
2980        extra_headers = {
2981            "X-Stainless-Helper-Method": "chat.completions.stream",
2982            **(extra_headers or {}),
2983        }
2984
2985        api_request = self.create(
2986            messages=messages,
2987            model=model,
2988            audio=audio,
2989            stream=True,
2990            response_format=_type_to_response_format(response_format),
2991            frequency_penalty=frequency_penalty,
2992            function_call=function_call,
2993            functions=functions,
2994            logit_bias=logit_bias,
2995            logprobs=logprobs,
2996            max_completion_tokens=max_completion_tokens,
2997            max_tokens=max_tokens,
2998            metadata=metadata,
2999            modalities=modalities,
3000            n=n,
3001            parallel_tool_calls=parallel_tool_calls,
3002            prediction=prediction,
3003            presence_penalty=presence_penalty,
3004            prompt_cache_key=prompt_cache_key,
3005            prompt_cache_retention=prompt_cache_retention,
3006            reasoning_effort=reasoning_effort,
3007            safety_identifier=safety_identifier,
3008            seed=seed,
3009            service_tier=service_tier,
3010            stop=stop,
3011            store=store,
3012            stream_options=stream_options,
3013            temperature=temperature,
3014            tool_choice=tool_choice,
3015            tools=tools,
3016            top_logprobs=top_logprobs,
3017            top_p=top_p,
3018            user=user,
3019            verbosity=verbosity,
3020            web_search_options=web_search_options,
3021            extra_headers=extra_headers,
3022            extra_query=extra_query,
3023            extra_body=extra_body,
3024            timeout=timeout,
3025        )
3026        return AsyncChatCompletionStreamManager(
3027            api_request,
3028            response_format=response_format,
3029            input_tools=tools,
3030        )
3031
3032
3033class CompletionsWithRawResponse:
3034    def __init__(self, completions: Completions) -> None:
3035        self._completions = completions
3036
3037        self.parse = _legacy_response.to_raw_response_wrapper(
3038            completions.parse,
3039        )
3040        self.create = _legacy_response.to_raw_response_wrapper(
3041            completions.create,
3042        )
3043        self.retrieve = _legacy_response.to_raw_response_wrapper(
3044            completions.retrieve,
3045        )
3046        self.update = _legacy_response.to_raw_response_wrapper(
3047            completions.update,
3048        )
3049        self.list = _legacy_response.to_raw_response_wrapper(
3050            completions.list,
3051        )
3052        self.delete = _legacy_response.to_raw_response_wrapper(
3053            completions.delete,
3054        )
3055
3056    @cached_property
3057    def messages(self) -> MessagesWithRawResponse:
3058        return MessagesWithRawResponse(self._completions.messages)
3059
3060
3061class AsyncCompletionsWithRawResponse:
3062    def __init__(self, completions: AsyncCompletions) -> None:
3063        self._completions = completions
3064
3065        self.parse = _legacy_response.async_to_raw_response_wrapper(
3066            completions.parse,
3067        )
3068        self.create = _legacy_response.async_to_raw_response_wrapper(
3069            completions.create,
3070        )
3071        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
3072            completions.retrieve,
3073        )
3074        self.update = _legacy_response.async_to_raw_response_wrapper(
3075            completions.update,
3076        )
3077        self.list = _legacy_response.async_to_raw_response_wrapper(
3078            completions.list,
3079        )
3080        self.delete = _legacy_response.async_to_raw_response_wrapper(
3081            completions.delete,
3082        )
3083
3084    @cached_property
3085    def messages(self) -> AsyncMessagesWithRawResponse:
3086        return AsyncMessagesWithRawResponse(self._completions.messages)
3087
3088
3089class CompletionsWithStreamingResponse:
3090    def __init__(self, completions: Completions) -> None:
3091        self._completions = completions
3092
3093        self.parse = to_streamed_response_wrapper(
3094            completions.parse,
3095        )
3096        self.create = to_streamed_response_wrapper(
3097            completions.create,
3098        )
3099        self.retrieve = to_streamed_response_wrapper(
3100            completions.retrieve,
3101        )
3102        self.update = to_streamed_response_wrapper(
3103            completions.update,
3104        )
3105        self.list = to_streamed_response_wrapper(
3106            completions.list,
3107        )
3108        self.delete = to_streamed_response_wrapper(
3109            completions.delete,
3110        )
3111
3112    @cached_property
3113    def messages(self) -> MessagesWithStreamingResponse:
3114        return MessagesWithStreamingResponse(self._completions.messages)
3115
3116
3117class AsyncCompletionsWithStreamingResponse:
3118    def __init__(self, completions: AsyncCompletions) -> None:
3119        self._completions = completions
3120
3121        self.parse = async_to_streamed_response_wrapper(
3122            completions.parse,
3123        )
3124        self.create = async_to_streamed_response_wrapper(
3125            completions.create,
3126        )
3127        self.retrieve = async_to_streamed_response_wrapper(
3128            completions.retrieve,
3129        )
3130        self.update = async_to_streamed_response_wrapper(
3131            completions.update,
3132        )
3133        self.list = async_to_streamed_response_wrapper(
3134            completions.list,
3135        )
3136        self.delete = async_to_streamed_response_wrapper(
3137            completions.delete,
3138        )
3139
3140    @cached_property
3141    def messages(self) -> AsyncMessagesWithStreamingResponse:
3142        return AsyncMessagesWithStreamingResponse(self._completions.messages)
3143
3144
3145def validate_response_format(response_format: object) -> None:
3146    if inspect.isclass(response_format) and issubclass(response_format, pydantic.BaseModel):
3147        raise TypeError(
3148            "You tried to pass a `BaseModel` class to `chat.completions.create()`; You must use `chat.completions.parse()` instead"
3149        )