openai-python/src/openai/resources/completions.py at main

   1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
   2
   3from __future__ import annotations
   4
   5from typing import Dict, Union, Iterable, Optional
   6from typing_extensions import Literal, overload
   7
   8import httpx
   9
  10from .. import _legacy_response
  11from ..types import completion_create_params
  12from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
  13from .._utils import required_args, maybe_transform, async_maybe_transform
  14from .._compat import cached_property
  15from .._resource import SyncAPIResource, AsyncAPIResource
  16from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
  17from .._streaming import Stream, AsyncStream
  18from .._base_client import (
  19    make_request_options,
  20)
  21from ..types.completion import Completion
  22from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
  23
  24__all__ = ["Completions", "AsyncCompletions"]
  25
  26
  27class Completions(SyncAPIResource):
  28    @cached_property
  29    def with_raw_response(self) -> CompletionsWithRawResponse:
  30        """
  31        This property can be used as a prefix for any HTTP method call to return
  32        the raw response object instead of the parsed content.
  33
  34        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  35        """
  36        return CompletionsWithRawResponse(self)
  37
  38    @cached_property
  39    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
  40        """
  41        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  42
  43        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  44        """
  45        return CompletionsWithStreamingResponse(self)
  46
  47    @overload
  48    def create(
  49        self,
  50        *,
  51        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
  52        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
  53        best_of: Optional[int] | Omit = omit,
  54        echo: Optional[bool] | Omit = omit,
  55        frequency_penalty: Optional[float] | Omit = omit,
  56        logit_bias: Optional[Dict[str, int]] | Omit = omit,
  57        logprobs: Optional[int] | Omit = omit,
  58        max_tokens: Optional[int] | Omit = omit,
  59        n: Optional[int] | Omit = omit,
  60        presence_penalty: Optional[float] | Omit = omit,
  61        seed: Optional[int] | Omit = omit,
  62        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
  63        stream: Optional[Literal[False]] | Omit = omit,
  64        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
  65        suffix: Optional[str] | Omit = omit,
  66        temperature: Optional[float] | Omit = omit,
  67        top_p: Optional[float] | Omit = omit,
  68        user: str | Omit = omit,
  69        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  70        # The extra values given here take precedence over values defined on the client or passed to this method.
  71        extra_headers: Headers | None = None,
  72        extra_query: Query | None = None,
  73        extra_body: Body | None = None,
  74        timeout: float | httpx.Timeout | None | NotGiven = not_given,
  75    ) -> Completion:
  76        """
  77        Creates a completion for the provided prompt and parameters.
  78
  79        Args:
  80          model: ID of the model to use. You can use the
  81              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  82              see all of your available models, or see our
  83              [Model overview](https://platform.openai.com/docs/models) for descriptions of
  84              them.
  85
  86          prompt: The prompt(s) to generate completions for, encoded as a string, array of
  87              strings, array of tokens, or array of token arrays.
  88
  89              Note that <|endoftext|> is the document separator that the model sees during
  90              training, so if a prompt is not specified the model will generate as if from the
  91              beginning of a new document.
  92
  93          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
  94              the highest log probability per token). Results cannot be streamed.
  95
  96              When used with `n`, `best_of` controls the number of candidate completions and
  97              `n` specifies how many to return – `best_of` must be greater than `n`.
  98
  99              **Note:** Because this parameter generates many completions, it can quickly
 100              consume your token quota. Use carefully and ensure that you have reasonable
 101              settings for `max_tokens` and `stop`.
 102
 103          echo: Echo back the prompt in addition to the completion
 104
 105          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
 106              existing frequency in the text so far, decreasing the model's likelihood to
 107              repeat the same line verbatim.
 108
 109              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 110
 111          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 112
 113              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
 114              tokenizer) to an associated bias value from -100 to 100. You can use this
 115              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
 116              Mathematically, the bias is added to the logits generated by the model prior to
 117              sampling. The exact effect will vary per model, but values between -1 and 1
 118              should decrease or increase likelihood of selection; values like -100 or 100
 119              should result in a ban or exclusive selection of the relevant token.
 120
 121              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
 122              from being generated.
 123
 124          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
 125              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
 126              list of the 5 most likely tokens. The API will always return the `logprob` of
 127              the sampled token, so there may be up to `logprobs+1` elements in the response.
 128
 129              The maximum value for `logprobs` is 5.
 130
 131          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
 132              completion.
 133
 134              The token count of your prompt plus `max_tokens` cannot exceed the model's
 135              context length.
 136              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
 137              for counting tokens.
 138
 139          n: How many completions to generate for each prompt.
 140
 141              **Note:** Because this parameter generates many completions, it can quickly
 142              consume your token quota. Use carefully and ensure that you have reasonable
 143              settings for `max_tokens` and `stop`.
 144
 145          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
 146              whether they appear in the text so far, increasing the model's likelihood to
 147              talk about new topics.
 148
 149              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 150
 151          seed: If specified, our system will make a best effort to sample deterministically,
 152              such that repeated requests with the same `seed` and parameters should return
 153              the same result.
 154
 155              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
 156              response parameter to monitor changes in the backend.
 157
 158          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 159
 160              Up to 4 sequences where the API will stop generating further tokens. The
 161              returned text will not contain the stop sequence.
 162
 163          stream: Whether to stream back partial progress. If set, tokens will be sent as
 164              data-only
 165              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
 166              as they become available, with the stream terminated by a `data: [DONE]`
 167              message.
 168              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 169
 170          stream_options: Options for streaming response. Only set this when you set `stream: true`.
 171
 172          suffix: The suffix that comes after a completion of inserted text.
 173
 174              This parameter is only supported for `gpt-3.5-turbo-instruct`.
 175
 176          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 177              make the output more random, while lower values like 0.2 will make it more
 178              focused and deterministic.
 179
 180              We generally recommend altering this or `top_p` but not both.
 181
 182          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 183              model considers the results of the tokens with top_p probability mass. So 0.1
 184              means only the tokens comprising the top 10% probability mass are considered.
 185
 186              We generally recommend altering this or `temperature` but not both.
 187
 188          user: A unique identifier representing your end-user, which can help OpenAI to monitor
 189              and detect abuse.
 190              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 191
 192          extra_headers: Send extra headers
 193
 194          extra_query: Add additional query parameters to the request
 195
 196          extra_body: Add additional JSON properties to the request
 197
 198          timeout: Override the client-level default timeout for this request, in seconds
 199        """
 200        ...
 201
 202    @overload
 203    def create(
 204        self,
 205        *,
 206        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
 207        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
 208        stream: Literal[True],
 209        best_of: Optional[int] | Omit = omit,
 210        echo: Optional[bool] | Omit = omit,
 211        frequency_penalty: Optional[float] | Omit = omit,
 212        logit_bias: Optional[Dict[str, int]] | Omit = omit,
 213        logprobs: Optional[int] | Omit = omit,
 214        max_tokens: Optional[int] | Omit = omit,
 215        n: Optional[int] | Omit = omit,
 216        presence_penalty: Optional[float] | Omit = omit,
 217        seed: Optional[int] | Omit = omit,
 218        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
 219        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
 220        suffix: Optional[str] | Omit = omit,
 221        temperature: Optional[float] | Omit = omit,
 222        top_p: Optional[float] | Omit = omit,
 223        user: str | Omit = omit,
 224        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 225        # The extra values given here take precedence over values defined on the client or passed to this method.
 226        extra_headers: Headers | None = None,
 227        extra_query: Query | None = None,
 228        extra_body: Body | None = None,
 229        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 230    ) -> Stream[Completion]:
 231        """
 232        Creates a completion for the provided prompt and parameters.
 233
 234        Args:
 235          model: ID of the model to use. You can use the
 236              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
 237              see all of your available models, or see our
 238              [Model overview](https://platform.openai.com/docs/models) for descriptions of
 239              them.
 240
 241          prompt: The prompt(s) to generate completions for, encoded as a string, array of
 242              strings, array of tokens, or array of token arrays.
 243
 244              Note that <|endoftext|> is the document separator that the model sees during
 245              training, so if a prompt is not specified the model will generate as if from the
 246              beginning of a new document.
 247
 248          stream: Whether to stream back partial progress. If set, tokens will be sent as
 249              data-only
 250              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
 251              as they become available, with the stream terminated by a `data: [DONE]`
 252              message.
 253              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 254
 255          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
 256              the highest log probability per token). Results cannot be streamed.
 257
 258              When used with `n`, `best_of` controls the number of candidate completions and
 259              `n` specifies how many to return – `best_of` must be greater than `n`.
 260
 261              **Note:** Because this parameter generates many completions, it can quickly
 262              consume your token quota. Use carefully and ensure that you have reasonable
 263              settings for `max_tokens` and `stop`.
 264
 265          echo: Echo back the prompt in addition to the completion
 266
 267          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
 268              existing frequency in the text so far, decreasing the model's likelihood to
 269              repeat the same line verbatim.
 270
 271              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 272
 273          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 274
 275              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
 276              tokenizer) to an associated bias value from -100 to 100. You can use this
 277              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
 278              Mathematically, the bias is added to the logits generated by the model prior to
 279              sampling. The exact effect will vary per model, but values between -1 and 1
 280              should decrease or increase likelihood of selection; values like -100 or 100
 281              should result in a ban or exclusive selection of the relevant token.
 282
 283              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
 284              from being generated.
 285
 286          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
 287              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
 288              list of the 5 most likely tokens. The API will always return the `logprob` of
 289              the sampled token, so there may be up to `logprobs+1` elements in the response.
 290
 291              The maximum value for `logprobs` is 5.
 292
 293          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
 294              completion.
 295
 296              The token count of your prompt plus `max_tokens` cannot exceed the model's
 297              context length.
 298              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
 299              for counting tokens.
 300
 301          n: How many completions to generate for each prompt.
 302
 303              **Note:** Because this parameter generates many completions, it can quickly
 304              consume your token quota. Use carefully and ensure that you have reasonable
 305              settings for `max_tokens` and `stop`.
 306
 307          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
 308              whether they appear in the text so far, increasing the model's likelihood to
 309              talk about new topics.
 310
 311              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 312
 313          seed: If specified, our system will make a best effort to sample deterministically,
 314              such that repeated requests with the same `seed` and parameters should return
 315              the same result.
 316
 317              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
 318              response parameter to monitor changes in the backend.
 319
 320          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 321
 322              Up to 4 sequences where the API will stop generating further tokens. The
 323              returned text will not contain the stop sequence.
 324
 325          stream_options: Options for streaming response. Only set this when you set `stream: true`.
 326
 327          suffix: The suffix that comes after a completion of inserted text.
 328
 329              This parameter is only supported for `gpt-3.5-turbo-instruct`.
 330
 331          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 332              make the output more random, while lower values like 0.2 will make it more
 333              focused and deterministic.
 334
 335              We generally recommend altering this or `top_p` but not both.
 336
 337          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 338              model considers the results of the tokens with top_p probability mass. So 0.1
 339              means only the tokens comprising the top 10% probability mass are considered.
 340
 341              We generally recommend altering this or `temperature` but not both.
 342
 343          user: A unique identifier representing your end-user, which can help OpenAI to monitor
 344              and detect abuse.
 345              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 346
 347          extra_headers: Send extra headers
 348
 349          extra_query: Add additional query parameters to the request
 350
 351          extra_body: Add additional JSON properties to the request
 352
 353          timeout: Override the client-level default timeout for this request, in seconds
 354        """
 355        ...
 356
 357    @overload
 358    def create(
 359        self,
 360        *,
 361        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
 362        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
 363        stream: bool,
 364        best_of: Optional[int] | Omit = omit,
 365        echo: Optional[bool] | Omit = omit,
 366        frequency_penalty: Optional[float] | Omit = omit,
 367        logit_bias: Optional[Dict[str, int]] | Omit = omit,
 368        logprobs: Optional[int] | Omit = omit,
 369        max_tokens: Optional[int] | Omit = omit,
 370        n: Optional[int] | Omit = omit,
 371        presence_penalty: Optional[float] | Omit = omit,
 372        seed: Optional[int] | Omit = omit,
 373        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
 374        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
 375        suffix: Optional[str] | Omit = omit,
 376        temperature: Optional[float] | Omit = omit,
 377        top_p: Optional[float] | Omit = omit,
 378        user: str | Omit = omit,
 379        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 380        # The extra values given here take precedence over values defined on the client or passed to this method.
 381        extra_headers: Headers | None = None,
 382        extra_query: Query | None = None,
 383        extra_body: Body | None = None,
 384        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 385    ) -> Completion | Stream[Completion]:
 386        """
 387        Creates a completion for the provided prompt and parameters.
 388
 389        Args:
 390          model: ID of the model to use. You can use the
 391              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
 392              see all of your available models, or see our
 393              [Model overview](https://platform.openai.com/docs/models) for descriptions of
 394              them.
 395
 396          prompt: The prompt(s) to generate completions for, encoded as a string, array of
 397              strings, array of tokens, or array of token arrays.
 398
 399              Note that <|endoftext|> is the document separator that the model sees during
 400              training, so if a prompt is not specified the model will generate as if from the
 401              beginning of a new document.
 402
 403          stream: Whether to stream back partial progress. If set, tokens will be sent as
 404              data-only
 405              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
 406              as they become available, with the stream terminated by a `data: [DONE]`
 407              message.
 408              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 409
 410          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
 411              the highest log probability per token). Results cannot be streamed.
 412
 413              When used with `n`, `best_of` controls the number of candidate completions and
 414              `n` specifies how many to return – `best_of` must be greater than `n`.
 415
 416              **Note:** Because this parameter generates many completions, it can quickly
 417              consume your token quota. Use carefully and ensure that you have reasonable
 418              settings for `max_tokens` and `stop`.
 419
 420          echo: Echo back the prompt in addition to the completion
 421
 422          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
 423              existing frequency in the text so far, decreasing the model's likelihood to
 424              repeat the same line verbatim.
 425
 426              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 427
 428          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 429
 430              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
 431              tokenizer) to an associated bias value from -100 to 100. You can use this
 432              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
 433              Mathematically, the bias is added to the logits generated by the model prior to
 434              sampling. The exact effect will vary per model, but values between -1 and 1
 435              should decrease or increase likelihood of selection; values like -100 or 100
 436              should result in a ban or exclusive selection of the relevant token.
 437
 438              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
 439              from being generated.
 440
 441          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
 442              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
 443              list of the 5 most likely tokens. The API will always return the `logprob` of
 444              the sampled token, so there may be up to `logprobs+1` elements in the response.
 445
 446              The maximum value for `logprobs` is 5.
 447
 448          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
 449              completion.
 450
 451              The token count of your prompt plus `max_tokens` cannot exceed the model's
 452              context length.
 453              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
 454              for counting tokens.
 455
 456          n: How many completions to generate for each prompt.
 457
 458              **Note:** Because this parameter generates many completions, it can quickly
 459              consume your token quota. Use carefully and ensure that you have reasonable
 460              settings for `max_tokens` and `stop`.
 461
 462          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
 463              whether they appear in the text so far, increasing the model's likelihood to
 464              talk about new topics.
 465
 466              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 467
 468          seed: If specified, our system will make a best effort to sample deterministically,
 469              such that repeated requests with the same `seed` and parameters should return
 470              the same result.
 471
 472              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
 473              response parameter to monitor changes in the backend.
 474
 475          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 476
 477              Up to 4 sequences where the API will stop generating further tokens. The
 478              returned text will not contain the stop sequence.
 479
 480          stream_options: Options for streaming response. Only set this when you set `stream: true`.
 481
 482          suffix: The suffix that comes after a completion of inserted text.
 483
 484              This parameter is only supported for `gpt-3.5-turbo-instruct`.
 485
 486          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 487              make the output more random, while lower values like 0.2 will make it more
 488              focused and deterministic.
 489
 490              We generally recommend altering this or `top_p` but not both.
 491
 492          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 493              model considers the results of the tokens with top_p probability mass. So 0.1
 494              means only the tokens comprising the top 10% probability mass are considered.
 495
 496              We generally recommend altering this or `temperature` but not both.
 497
 498          user: A unique identifier representing your end-user, which can help OpenAI to monitor
 499              and detect abuse.
 500              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 501
 502          extra_headers: Send extra headers
 503
 504          extra_query: Add additional query parameters to the request
 505
 506          extra_body: Add additional JSON properties to the request
 507
 508          timeout: Override the client-level default timeout for this request, in seconds
 509        """
 510        ...
 511
 512    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
 513    def create(
 514        self,
 515        *,
 516        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
 517        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
 518        best_of: Optional[int] | Omit = omit,
 519        echo: Optional[bool] | Omit = omit,
 520        frequency_penalty: Optional[float] | Omit = omit,
 521        logit_bias: Optional[Dict[str, int]] | Omit = omit,
 522        logprobs: Optional[int] | Omit = omit,
 523        max_tokens: Optional[int] | Omit = omit,
 524        n: Optional[int] | Omit = omit,
 525        presence_penalty: Optional[float] | Omit = omit,
 526        seed: Optional[int] | Omit = omit,
 527        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
 528        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
 529        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
 530        suffix: Optional[str] | Omit = omit,
 531        temperature: Optional[float] | Omit = omit,
 532        top_p: Optional[float] | Omit = omit,
 533        user: str | Omit = omit,
 534        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 535        # The extra values given here take precedence over values defined on the client or passed to this method.
 536        extra_headers: Headers | None = None,
 537        extra_query: Query | None = None,
 538        extra_body: Body | None = None,
 539        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 540    ) -> Completion | Stream[Completion]:
 541        return self._post(
 542            "/completions",
 543            body=maybe_transform(
 544                {
 545                    "model": model,
 546                    "prompt": prompt,
 547                    "best_of": best_of,
 548                    "echo": echo,
 549                    "frequency_penalty": frequency_penalty,
 550                    "logit_bias": logit_bias,
 551                    "logprobs": logprobs,
 552                    "max_tokens": max_tokens,
 553                    "n": n,
 554                    "presence_penalty": presence_penalty,
 555                    "seed": seed,
 556                    "stop": stop,
 557                    "stream": stream,
 558                    "stream_options": stream_options,
 559                    "suffix": suffix,
 560                    "temperature": temperature,
 561                    "top_p": top_p,
 562                    "user": user,
 563                },
 564                completion_create_params.CompletionCreateParamsStreaming
 565                if stream
 566                else completion_create_params.CompletionCreateParamsNonStreaming,
 567            ),
 568            options=make_request_options(
 569                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
 570            ),
 571            cast_to=Completion,
 572            stream=stream or False,
 573            stream_cls=Stream[Completion],
 574        )
 575
 576
 577class AsyncCompletions(AsyncAPIResource):
 578    @cached_property
 579    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
 580        """
 581        This property can be used as a prefix for any HTTP method call to return
 582        the raw response object instead of the parsed content.
 583
 584        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
 585        """
 586        return AsyncCompletionsWithRawResponse(self)
 587
 588    @cached_property
 589    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
 590        """
 591        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 592
 593        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
 594        """
 595        return AsyncCompletionsWithStreamingResponse(self)
 596
 597    @overload
 598    async def create(
 599        self,
 600        *,
 601        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
 602        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
 603        best_of: Optional[int] | Omit = omit,
 604        echo: Optional[bool] | Omit = omit,
 605        frequency_penalty: Optional[float] | Omit = omit,
 606        logit_bias: Optional[Dict[str, int]] | Omit = omit,
 607        logprobs: Optional[int] | Omit = omit,
 608        max_tokens: Optional[int] | Omit = omit,
 609        n: Optional[int] | Omit = omit,
 610        presence_penalty: Optional[float] | Omit = omit,
 611        seed: Optional[int] | Omit = omit,
 612        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
 613        stream: Optional[Literal[False]] | Omit = omit,
 614        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
 615        suffix: Optional[str] | Omit = omit,
 616        temperature: Optional[float] | Omit = omit,
 617        top_p: Optional[float] | Omit = omit,
 618        user: str | Omit = omit,
 619        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 620        # The extra values given here take precedence over values defined on the client or passed to this method.
 621        extra_headers: Headers | None = None,
 622        extra_query: Query | None = None,
 623        extra_body: Body | None = None,
 624        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 625    ) -> Completion:
 626        """
 627        Creates a completion for the provided prompt and parameters.
 628
 629        Args:
 630          model: ID of the model to use. You can use the
 631              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
 632              see all of your available models, or see our
 633              [Model overview](https://platform.openai.com/docs/models) for descriptions of
 634              them.
 635
 636          prompt: The prompt(s) to generate completions for, encoded as a string, array of
 637              strings, array of tokens, or array of token arrays.
 638
 639              Note that <|endoftext|> is the document separator that the model sees during
 640              training, so if a prompt is not specified the model will generate as if from the
 641              beginning of a new document.
 642
 643          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
 644              the highest log probability per token). Results cannot be streamed.
 645
 646              When used with `n`, `best_of` controls the number of candidate completions and
 647              `n` specifies how many to return – `best_of` must be greater than `n`.
 648
 649              **Note:** Because this parameter generates many completions, it can quickly
 650              consume your token quota. Use carefully and ensure that you have reasonable
 651              settings for `max_tokens` and `stop`.
 652
 653          echo: Echo back the prompt in addition to the completion
 654
 655          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
 656              existing frequency in the text so far, decreasing the model's likelihood to
 657              repeat the same line verbatim.
 658
 659              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 660
 661          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 662
 663              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
 664              tokenizer) to an associated bias value from -100 to 100. You can use this
 665              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
 666              Mathematically, the bias is added to the logits generated by the model prior to
 667              sampling. The exact effect will vary per model, but values between -1 and 1
 668              should decrease or increase likelihood of selection; values like -100 or 100
 669              should result in a ban or exclusive selection of the relevant token.
 670
 671              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
 672              from being generated.
 673
 674          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
 675              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
 676              list of the 5 most likely tokens. The API will always return the `logprob` of
 677              the sampled token, so there may be up to `logprobs+1` elements in the response.
 678
 679              The maximum value for `logprobs` is 5.
 680
 681          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
 682              completion.
 683
 684              The token count of your prompt plus `max_tokens` cannot exceed the model's
 685              context length.
 686              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
 687              for counting tokens.
 688
 689          n: How many completions to generate for each prompt.
 690
 691              **Note:** Because this parameter generates many completions, it can quickly
 692              consume your token quota. Use carefully and ensure that you have reasonable
 693              settings for `max_tokens` and `stop`.
 694
 695          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
 696              whether they appear in the text so far, increasing the model's likelihood to
 697              talk about new topics.
 698
 699              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 700
 701          seed: If specified, our system will make a best effort to sample deterministically,
 702              such that repeated requests with the same `seed` and parameters should return
 703              the same result.
 704
 705              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
 706              response parameter to monitor changes in the backend.
 707
 708          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 709
 710              Up to 4 sequences where the API will stop generating further tokens. The
 711              returned text will not contain the stop sequence.
 712
 713          stream: Whether to stream back partial progress. If set, tokens will be sent as
 714              data-only
 715              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
 716              as they become available, with the stream terminated by a `data: [DONE]`
 717              message.
 718              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 719
 720          stream_options: Options for streaming response. Only set this when you set `stream: true`.
 721
 722          suffix: The suffix that comes after a completion of inserted text.
 723
 724              This parameter is only supported for `gpt-3.5-turbo-instruct`.
 725
 726          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 727              make the output more random, while lower values like 0.2 will make it more
 728              focused and deterministic.
 729
 730              We generally recommend altering this or `top_p` but not both.
 731
 732          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 733              model considers the results of the tokens with top_p probability mass. So 0.1
 734              means only the tokens comprising the top 10% probability mass are considered.
 735
 736              We generally recommend altering this or `temperature` but not both.
 737
 738          user: A unique identifier representing your end-user, which can help OpenAI to monitor
 739              and detect abuse.
 740              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 741
 742          extra_headers: Send extra headers
 743
 744          extra_query: Add additional query parameters to the request
 745
 746          extra_body: Add additional JSON properties to the request
 747
 748          timeout: Override the client-level default timeout for this request, in seconds
 749        """
 750        ...
 751
 752    @overload
 753    async def create(
 754        self,
 755        *,
 756        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
 757        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
 758        stream: Literal[True],
 759        best_of: Optional[int] | Omit = omit,
 760        echo: Optional[bool] | Omit = omit,
 761        frequency_penalty: Optional[float] | Omit = omit,
 762        logit_bias: Optional[Dict[str, int]] | Omit = omit,
 763        logprobs: Optional[int] | Omit = omit,
 764        max_tokens: Optional[int] | Omit = omit,
 765        n: Optional[int] | Omit = omit,
 766        presence_penalty: Optional[float] | Omit = omit,
 767        seed: Optional[int] | Omit = omit,
 768        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
 769        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
 770        suffix: Optional[str] | Omit = omit,
 771        temperature: Optional[float] | Omit = omit,
 772        top_p: Optional[float] | Omit = omit,
 773        user: str | Omit = omit,
 774        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 775        # The extra values given here take precedence over values defined on the client or passed to this method.
 776        extra_headers: Headers | None = None,
 777        extra_query: Query | None = None,
 778        extra_body: Body | None = None,
 779        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 780    ) -> AsyncStream[Completion]:
 781        """
 782        Creates a completion for the provided prompt and parameters.
 783
 784        Args:
 785          model: ID of the model to use. You can use the
 786              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
 787              see all of your available models, or see our
 788              [Model overview](https://platform.openai.com/docs/models) for descriptions of
 789              them.
 790
 791          prompt: The prompt(s) to generate completions for, encoded as a string, array of
 792              strings, array of tokens, or array of token arrays.
 793
 794              Note that <|endoftext|> is the document separator that the model sees during
 795              training, so if a prompt is not specified the model will generate as if from the
 796              beginning of a new document.
 797
 798          stream: Whether to stream back partial progress. If set, tokens will be sent as
 799              data-only
 800              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
 801              as they become available, with the stream terminated by a `data: [DONE]`
 802              message.
 803              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 804
 805          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
 806              the highest log probability per token). Results cannot be streamed.
 807
 808              When used with `n`, `best_of` controls the number of candidate completions and
 809              `n` specifies how many to return – `best_of` must be greater than `n`.
 810
 811              **Note:** Because this parameter generates many completions, it can quickly
 812              consume your token quota. Use carefully and ensure that you have reasonable
 813              settings for `max_tokens` and `stop`.
 814
 815          echo: Echo back the prompt in addition to the completion
 816
 817          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
 818              existing frequency in the text so far, decreasing the model's likelihood to
 819              repeat the same line verbatim.
 820
 821              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 822
 823          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 824
 825              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
 826              tokenizer) to an associated bias value from -100 to 100. You can use this
 827              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
 828              Mathematically, the bias is added to the logits generated by the model prior to
 829              sampling. The exact effect will vary per model, but values between -1 and 1
 830              should decrease or increase likelihood of selection; values like -100 or 100
 831              should result in a ban or exclusive selection of the relevant token.
 832
 833              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
 834              from being generated.
 835
 836          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
 837              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
 838              list of the 5 most likely tokens. The API will always return the `logprob` of
 839              the sampled token, so there may be up to `logprobs+1` elements in the response.
 840
 841              The maximum value for `logprobs` is 5.
 842
 843          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
 844              completion.
 845
 846              The token count of your prompt plus `max_tokens` cannot exceed the model's
 847              context length.
 848              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
 849              for counting tokens.
 850
 851          n: How many completions to generate for each prompt.
 852
 853              **Note:** Because this parameter generates many completions, it can quickly
 854              consume your token quota. Use carefully and ensure that you have reasonable
 855              settings for `max_tokens` and `stop`.
 856
 857          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
 858              whether they appear in the text so far, increasing the model's likelihood to
 859              talk about new topics.
 860
 861              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 862
 863          seed: If specified, our system will make a best effort to sample deterministically,
 864              such that repeated requests with the same `seed` and parameters should return
 865              the same result.
 866
 867              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
 868              response parameter to monitor changes in the backend.
 869
 870          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
 871
 872              Up to 4 sequences where the API will stop generating further tokens. The
 873              returned text will not contain the stop sequence.
 874
 875          stream_options: Options for streaming response. Only set this when you set `stream: true`.
 876
 877          suffix: The suffix that comes after a completion of inserted text.
 878
 879              This parameter is only supported for `gpt-3.5-turbo-instruct`.
 880
 881          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
 882              make the output more random, while lower values like 0.2 will make it more
 883              focused and deterministic.
 884
 885              We generally recommend altering this or `top_p` but not both.
 886
 887          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
 888              model considers the results of the tokens with top_p probability mass. So 0.1
 889              means only the tokens comprising the top 10% probability mass are considered.
 890
 891              We generally recommend altering this or `temperature` but not both.
 892
 893          user: A unique identifier representing your end-user, which can help OpenAI to monitor
 894              and detect abuse.
 895              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 896
 897          extra_headers: Send extra headers
 898
 899          extra_query: Add additional query parameters to the request
 900
 901          extra_body: Add additional JSON properties to the request
 902
 903          timeout: Override the client-level default timeout for this request, in seconds
 904        """
 905        ...
 906
 907    @overload
 908    async def create(
 909        self,
 910        *,
 911        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
 912        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
 913        stream: bool,
 914        best_of: Optional[int] | Omit = omit,
 915        echo: Optional[bool] | Omit = omit,
 916        frequency_penalty: Optional[float] | Omit = omit,
 917        logit_bias: Optional[Dict[str, int]] | Omit = omit,
 918        logprobs: Optional[int] | Omit = omit,
 919        max_tokens: Optional[int] | Omit = omit,
 920        n: Optional[int] | Omit = omit,
 921        presence_penalty: Optional[float] | Omit = omit,
 922        seed: Optional[int] | Omit = omit,
 923        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
 924        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
 925        suffix: Optional[str] | Omit = omit,
 926        temperature: Optional[float] | Omit = omit,
 927        top_p: Optional[float] | Omit = omit,
 928        user: str | Omit = omit,
 929        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 930        # The extra values given here take precedence over values defined on the client or passed to this method.
 931        extra_headers: Headers | None = None,
 932        extra_query: Query | None = None,
 933        extra_body: Body | None = None,
 934        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 935    ) -> Completion | AsyncStream[Completion]:
 936        """
 937        Creates a completion for the provided prompt and parameters.
 938
 939        Args:
 940          model: ID of the model to use. You can use the
 941              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
 942              see all of your available models, or see our
 943              [Model overview](https://platform.openai.com/docs/models) for descriptions of
 944              them.
 945
 946          prompt: The prompt(s) to generate completions for, encoded as a string, array of
 947              strings, array of tokens, or array of token arrays.
 948
 949              Note that <|endoftext|> is the document separator that the model sees during
 950              training, so if a prompt is not specified the model will generate as if from the
 951              beginning of a new document.
 952
 953          stream: Whether to stream back partial progress. If set, tokens will be sent as
 954              data-only
 955              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
 956              as they become available, with the stream terminated by a `data: [DONE]`
 957              message.
 958              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
 959
 960          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
 961              the highest log probability per token). Results cannot be streamed.
 962
 963              When used with `n`, `best_of` controls the number of candidate completions and
 964              `n` specifies how many to return – `best_of` must be greater than `n`.
 965
 966              **Note:** Because this parameter generates many completions, it can quickly
 967              consume your token quota. Use carefully and ensure that you have reasonable
 968              settings for `max_tokens` and `stop`.
 969
 970          echo: Echo back the prompt in addition to the completion
 971
 972          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
 973              existing frequency in the text so far, decreasing the model's likelihood to
 974              repeat the same line verbatim.
 975
 976              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 977
 978          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 979
 980              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
 981              tokenizer) to an associated bias value from -100 to 100. You can use this
 982              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
 983              Mathematically, the bias is added to the logits generated by the model prior to
 984              sampling. The exact effect will vary per model, but values between -1 and 1
 985              should decrease or increase likelihood of selection; values like -100 or 100
 986              should result in a ban or exclusive selection of the relevant token.
 987
 988              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
 989              from being generated.
 990
 991          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
 992              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
 993              list of the 5 most likely tokens. The API will always return the `logprob` of
 994              the sampled token, so there may be up to `logprobs+1` elements in the response.
 995
 996              The maximum value for `logprobs` is 5.
 997
 998          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
 999              completion.
1000
1001              The token count of your prompt plus `max_tokens` cannot exceed the model's
1002              context length.
1003              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
1004              for counting tokens.
1005
1006          n: How many completions to generate for each prompt.
1007
1008              **Note:** Because this parameter generates many completions, it can quickly
1009              consume your token quota. Use carefully and ensure that you have reasonable
1010              settings for `max_tokens` and `stop`.
1011
1012          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
1013              whether they appear in the text so far, increasing the model's likelihood to
1014              talk about new topics.
1015
1016              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
1017
1018          seed: If specified, our system will make a best effort to sample deterministically,
1019              such that repeated requests with the same `seed` and parameters should return
1020              the same result.
1021
1022              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
1023              response parameter to monitor changes in the backend.
1024
1025          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
1026
1027              Up to 4 sequences where the API will stop generating further tokens. The
1028              returned text will not contain the stop sequence.
1029
1030          stream_options: Options for streaming response. Only set this when you set `stream: true`.
1031
1032          suffix: The suffix that comes after a completion of inserted text.
1033
1034              This parameter is only supported for `gpt-3.5-turbo-instruct`.
1035
1036          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1037              make the output more random, while lower values like 0.2 will make it more
1038              focused and deterministic.
1039
1040              We generally recommend altering this or `top_p` but not both.
1041
1042          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1043              model considers the results of the tokens with top_p probability mass. So 0.1
1044              means only the tokens comprising the top 10% probability mass are considered.
1045
1046              We generally recommend altering this or `temperature` but not both.
1047
1048          user: A unique identifier representing your end-user, which can help OpenAI to monitor
1049              and detect abuse.
1050              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
1051
1052          extra_headers: Send extra headers
1053
1054          extra_query: Add additional query parameters to the request
1055
1056          extra_body: Add additional JSON properties to the request
1057
1058          timeout: Override the client-level default timeout for this request, in seconds
1059        """
1060        ...
1061
1062    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
1063    async def create(
1064        self,
1065        *,
1066        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
1067        prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
1068        best_of: Optional[int] | Omit = omit,
1069        echo: Optional[bool] | Omit = omit,
1070        frequency_penalty: Optional[float] | Omit = omit,
1071        logit_bias: Optional[Dict[str, int]] | Omit = omit,
1072        logprobs: Optional[int] | Omit = omit,
1073        max_tokens: Optional[int] | Omit = omit,
1074        n: Optional[int] | Omit = omit,
1075        presence_penalty: Optional[float] | Omit = omit,
1076        seed: Optional[int] | Omit = omit,
1077        stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
1078        stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
1079        stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
1080        suffix: Optional[str] | Omit = omit,
1081        temperature: Optional[float] | Omit = omit,
1082        top_p: Optional[float] | Omit = omit,
1083        user: str | Omit = omit,
1084        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1085        # The extra values given here take precedence over values defined on the client or passed to this method.
1086        extra_headers: Headers | None = None,
1087        extra_query: Query | None = None,
1088        extra_body: Body | None = None,
1089        timeout: float | httpx.Timeout | None | NotGiven = not_given,
1090    ) -> Completion | AsyncStream[Completion]:
1091        return await self._post(
1092            "/completions",
1093            body=await async_maybe_transform(
1094                {
1095                    "model": model,
1096                    "prompt": prompt,
1097                    "best_of": best_of,
1098                    "echo": echo,
1099                    "frequency_penalty": frequency_penalty,
1100                    "logit_bias": logit_bias,
1101                    "logprobs": logprobs,
1102                    "max_tokens": max_tokens,
1103                    "n": n,
1104                    "presence_penalty": presence_penalty,
1105                    "seed": seed,
1106                    "stop": stop,
1107                    "stream": stream,
1108                    "stream_options": stream_options,
1109                    "suffix": suffix,
1110                    "temperature": temperature,
1111                    "top_p": top_p,
1112                    "user": user,
1113                },
1114                completion_create_params.CompletionCreateParamsStreaming
1115                if stream
1116                else completion_create_params.CompletionCreateParamsNonStreaming,
1117            ),
1118            options=make_request_options(
1119                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1120            ),
1121            cast_to=Completion,
1122            stream=stream or False,
1123            stream_cls=AsyncStream[Completion],
1124        )
1125
1126
1127class CompletionsWithRawResponse:
1128    def __init__(self, completions: Completions) -> None:
1129        self._completions = completions
1130
1131        self.create = _legacy_response.to_raw_response_wrapper(
1132            completions.create,
1133        )
1134
1135
1136class AsyncCompletionsWithRawResponse:
1137    def __init__(self, completions: AsyncCompletions) -> None:
1138        self._completions = completions
1139
1140        self.create = _legacy_response.async_to_raw_response_wrapper(
1141            completions.create,
1142        )
1143
1144
1145class CompletionsWithStreamingResponse:
1146    def __init__(self, completions: Completions) -> None:
1147        self._completions = completions
1148
1149        self.create = to_streamed_response_wrapper(
1150            completions.create,
1151        )
1152
1153
1154class AsyncCompletionsWithStreamingResponse:
1155    def __init__(self, completions: AsyncCompletions) -> None:
1156        self._completions = completions
1157
1158        self.create = async_to_streamed_response_wrapper(
1159            completions.create,
1160        )