main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5from typing import Dict, Union, Iterable, Optional
6from typing_extensions import Literal, overload
7
8import httpx
9
10from .. import _legacy_response
11from ..types import completion_create_params
12from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
13from .._utils import required_args, maybe_transform, async_maybe_transform
14from .._compat import cached_property
15from .._resource import SyncAPIResource, AsyncAPIResource
16from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
17from .._streaming import Stream, AsyncStream
18from .._base_client import (
19 make_request_options,
20)
21from ..types.completion import Completion
22from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
23
24__all__ = ["Completions", "AsyncCompletions"]
25
26
27class Completions(SyncAPIResource):
28 @cached_property
29 def with_raw_response(self) -> CompletionsWithRawResponse:
30 """
31 This property can be used as a prefix for any HTTP method call to return
32 the raw response object instead of the parsed content.
33
34 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
35 """
36 return CompletionsWithRawResponse(self)
37
38 @cached_property
39 def with_streaming_response(self) -> CompletionsWithStreamingResponse:
40 """
41 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
42
43 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
44 """
45 return CompletionsWithStreamingResponse(self)
46
47 @overload
48 def create(
49 self,
50 *,
51 model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
52 prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
53 best_of: Optional[int] | Omit = omit,
54 echo: Optional[bool] | Omit = omit,
55 frequency_penalty: Optional[float] | Omit = omit,
56 logit_bias: Optional[Dict[str, int]] | Omit = omit,
57 logprobs: Optional[int] | Omit = omit,
58 max_tokens: Optional[int] | Omit = omit,
59 n: Optional[int] | Omit = omit,
60 presence_penalty: Optional[float] | Omit = omit,
61 seed: Optional[int] | Omit = omit,
62 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
63 stream: Optional[Literal[False]] | Omit = omit,
64 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
65 suffix: Optional[str] | Omit = omit,
66 temperature: Optional[float] | Omit = omit,
67 top_p: Optional[float] | Omit = omit,
68 user: str | Omit = omit,
69 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
70 # The extra values given here take precedence over values defined on the client or passed to this method.
71 extra_headers: Headers | None = None,
72 extra_query: Query | None = None,
73 extra_body: Body | None = None,
74 timeout: float | httpx.Timeout | None | NotGiven = not_given,
75 ) -> Completion:
76 """
77 Creates a completion for the provided prompt and parameters.
78
79 Args:
80 model: ID of the model to use. You can use the
81 [List models](https://platform.openai.com/docs/api-reference/models/list) API to
82 see all of your available models, or see our
83 [Model overview](https://platform.openai.com/docs/models) for descriptions of
84 them.
85
86 prompt: The prompt(s) to generate completions for, encoded as a string, array of
87 strings, array of tokens, or array of token arrays.
88
89 Note that <|endoftext|> is the document separator that the model sees during
90 training, so if a prompt is not specified the model will generate as if from the
91 beginning of a new document.
92
93 best_of: Generates `best_of` completions server-side and returns the "best" (the one with
94 the highest log probability per token). Results cannot be streamed.
95
96 When used with `n`, `best_of` controls the number of candidate completions and
97 `n` specifies how many to return – `best_of` must be greater than `n`.
98
99 **Note:** Because this parameter generates many completions, it can quickly
100 consume your token quota. Use carefully and ensure that you have reasonable
101 settings for `max_tokens` and `stop`.
102
103 echo: Echo back the prompt in addition to the completion
104
105 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
106 existing frequency in the text so far, decreasing the model's likelihood to
107 repeat the same line verbatim.
108
109 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
110
111 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
112
113 Accepts a JSON object that maps tokens (specified by their token ID in the GPT
114 tokenizer) to an associated bias value from -100 to 100. You can use this
115 [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
116 Mathematically, the bias is added to the logits generated by the model prior to
117 sampling. The exact effect will vary per model, but values between -1 and 1
118 should decrease or increase likelihood of selection; values like -100 or 100
119 should result in a ban or exclusive selection of the relevant token.
120
121 As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
122 from being generated.
123
124 logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
125 well the chosen tokens. For example, if `logprobs` is 5, the API will return a
126 list of the 5 most likely tokens. The API will always return the `logprob` of
127 the sampled token, so there may be up to `logprobs+1` elements in the response.
128
129 The maximum value for `logprobs` is 5.
130
131 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
132 completion.
133
134 The token count of your prompt plus `max_tokens` cannot exceed the model's
135 context length.
136 [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
137 for counting tokens.
138
139 n: How many completions to generate for each prompt.
140
141 **Note:** Because this parameter generates many completions, it can quickly
142 consume your token quota. Use carefully and ensure that you have reasonable
143 settings for `max_tokens` and `stop`.
144
145 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
146 whether they appear in the text so far, increasing the model's likelihood to
147 talk about new topics.
148
149 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
150
151 seed: If specified, our system will make a best effort to sample deterministically,
152 such that repeated requests with the same `seed` and parameters should return
153 the same result.
154
155 Determinism is not guaranteed, and you should refer to the `system_fingerprint`
156 response parameter to monitor changes in the backend.
157
158 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
159
160 Up to 4 sequences where the API will stop generating further tokens. The
161 returned text will not contain the stop sequence.
162
163 stream: Whether to stream back partial progress. If set, tokens will be sent as
164 data-only
165 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
166 as they become available, with the stream terminated by a `data: [DONE]`
167 message.
168 [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
169
170 stream_options: Options for streaming response. Only set this when you set `stream: true`.
171
172 suffix: The suffix that comes after a completion of inserted text.
173
174 This parameter is only supported for `gpt-3.5-turbo-instruct`.
175
176 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
177 make the output more random, while lower values like 0.2 will make it more
178 focused and deterministic.
179
180 We generally recommend altering this or `top_p` but not both.
181
182 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
183 model considers the results of the tokens with top_p probability mass. So 0.1
184 means only the tokens comprising the top 10% probability mass are considered.
185
186 We generally recommend altering this or `temperature` but not both.
187
188 user: A unique identifier representing your end-user, which can help OpenAI to monitor
189 and detect abuse.
190 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
191
192 extra_headers: Send extra headers
193
194 extra_query: Add additional query parameters to the request
195
196 extra_body: Add additional JSON properties to the request
197
198 timeout: Override the client-level default timeout for this request, in seconds
199 """
200 ...
201
202 @overload
203 def create(
204 self,
205 *,
206 model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
207 prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
208 stream: Literal[True],
209 best_of: Optional[int] | Omit = omit,
210 echo: Optional[bool] | Omit = omit,
211 frequency_penalty: Optional[float] | Omit = omit,
212 logit_bias: Optional[Dict[str, int]] | Omit = omit,
213 logprobs: Optional[int] | Omit = omit,
214 max_tokens: Optional[int] | Omit = omit,
215 n: Optional[int] | Omit = omit,
216 presence_penalty: Optional[float] | Omit = omit,
217 seed: Optional[int] | Omit = omit,
218 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
219 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
220 suffix: Optional[str] | Omit = omit,
221 temperature: Optional[float] | Omit = omit,
222 top_p: Optional[float] | Omit = omit,
223 user: str | Omit = omit,
224 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
225 # The extra values given here take precedence over values defined on the client or passed to this method.
226 extra_headers: Headers | None = None,
227 extra_query: Query | None = None,
228 extra_body: Body | None = None,
229 timeout: float | httpx.Timeout | None | NotGiven = not_given,
230 ) -> Stream[Completion]:
231 """
232 Creates a completion for the provided prompt and parameters.
233
234 Args:
235 model: ID of the model to use. You can use the
236 [List models](https://platform.openai.com/docs/api-reference/models/list) API to
237 see all of your available models, or see our
238 [Model overview](https://platform.openai.com/docs/models) for descriptions of
239 them.
240
241 prompt: The prompt(s) to generate completions for, encoded as a string, array of
242 strings, array of tokens, or array of token arrays.
243
244 Note that <|endoftext|> is the document separator that the model sees during
245 training, so if a prompt is not specified the model will generate as if from the
246 beginning of a new document.
247
248 stream: Whether to stream back partial progress. If set, tokens will be sent as
249 data-only
250 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
251 as they become available, with the stream terminated by a `data: [DONE]`
252 message.
253 [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
254
255 best_of: Generates `best_of` completions server-side and returns the "best" (the one with
256 the highest log probability per token). Results cannot be streamed.
257
258 When used with `n`, `best_of` controls the number of candidate completions and
259 `n` specifies how many to return – `best_of` must be greater than `n`.
260
261 **Note:** Because this parameter generates many completions, it can quickly
262 consume your token quota. Use carefully and ensure that you have reasonable
263 settings for `max_tokens` and `stop`.
264
265 echo: Echo back the prompt in addition to the completion
266
267 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
268 existing frequency in the text so far, decreasing the model's likelihood to
269 repeat the same line verbatim.
270
271 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
272
273 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
274
275 Accepts a JSON object that maps tokens (specified by their token ID in the GPT
276 tokenizer) to an associated bias value from -100 to 100. You can use this
277 [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
278 Mathematically, the bias is added to the logits generated by the model prior to
279 sampling. The exact effect will vary per model, but values between -1 and 1
280 should decrease or increase likelihood of selection; values like -100 or 100
281 should result in a ban or exclusive selection of the relevant token.
282
283 As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
284 from being generated.
285
286 logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
287 well the chosen tokens. For example, if `logprobs` is 5, the API will return a
288 list of the 5 most likely tokens. The API will always return the `logprob` of
289 the sampled token, so there may be up to `logprobs+1` elements in the response.
290
291 The maximum value for `logprobs` is 5.
292
293 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
294 completion.
295
296 The token count of your prompt plus `max_tokens` cannot exceed the model's
297 context length.
298 [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
299 for counting tokens.
300
301 n: How many completions to generate for each prompt.
302
303 **Note:** Because this parameter generates many completions, it can quickly
304 consume your token quota. Use carefully and ensure that you have reasonable
305 settings for `max_tokens` and `stop`.
306
307 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
308 whether they appear in the text so far, increasing the model's likelihood to
309 talk about new topics.
310
311 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
312
313 seed: If specified, our system will make a best effort to sample deterministically,
314 such that repeated requests with the same `seed` and parameters should return
315 the same result.
316
317 Determinism is not guaranteed, and you should refer to the `system_fingerprint`
318 response parameter to monitor changes in the backend.
319
320 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
321
322 Up to 4 sequences where the API will stop generating further tokens. The
323 returned text will not contain the stop sequence.
324
325 stream_options: Options for streaming response. Only set this when you set `stream: true`.
326
327 suffix: The suffix that comes after a completion of inserted text.
328
329 This parameter is only supported for `gpt-3.5-turbo-instruct`.
330
331 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
332 make the output more random, while lower values like 0.2 will make it more
333 focused and deterministic.
334
335 We generally recommend altering this or `top_p` but not both.
336
337 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
338 model considers the results of the tokens with top_p probability mass. So 0.1
339 means only the tokens comprising the top 10% probability mass are considered.
340
341 We generally recommend altering this or `temperature` but not both.
342
343 user: A unique identifier representing your end-user, which can help OpenAI to monitor
344 and detect abuse.
345 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
346
347 extra_headers: Send extra headers
348
349 extra_query: Add additional query parameters to the request
350
351 extra_body: Add additional JSON properties to the request
352
353 timeout: Override the client-level default timeout for this request, in seconds
354 """
355 ...
356
357 @overload
358 def create(
359 self,
360 *,
361 model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
362 prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
363 stream: bool,
364 best_of: Optional[int] | Omit = omit,
365 echo: Optional[bool] | Omit = omit,
366 frequency_penalty: Optional[float] | Omit = omit,
367 logit_bias: Optional[Dict[str, int]] | Omit = omit,
368 logprobs: Optional[int] | Omit = omit,
369 max_tokens: Optional[int] | Omit = omit,
370 n: Optional[int] | Omit = omit,
371 presence_penalty: Optional[float] | Omit = omit,
372 seed: Optional[int] | Omit = omit,
373 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
374 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
375 suffix: Optional[str] | Omit = omit,
376 temperature: Optional[float] | Omit = omit,
377 top_p: Optional[float] | Omit = omit,
378 user: str | Omit = omit,
379 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
380 # The extra values given here take precedence over values defined on the client or passed to this method.
381 extra_headers: Headers | None = None,
382 extra_query: Query | None = None,
383 extra_body: Body | None = None,
384 timeout: float | httpx.Timeout | None | NotGiven = not_given,
385 ) -> Completion | Stream[Completion]:
386 """
387 Creates a completion for the provided prompt and parameters.
388
389 Args:
390 model: ID of the model to use. You can use the
391 [List models](https://platform.openai.com/docs/api-reference/models/list) API to
392 see all of your available models, or see our
393 [Model overview](https://platform.openai.com/docs/models) for descriptions of
394 them.
395
396 prompt: The prompt(s) to generate completions for, encoded as a string, array of
397 strings, array of tokens, or array of token arrays.
398
399 Note that <|endoftext|> is the document separator that the model sees during
400 training, so if a prompt is not specified the model will generate as if from the
401 beginning of a new document.
402
403 stream: Whether to stream back partial progress. If set, tokens will be sent as
404 data-only
405 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
406 as they become available, with the stream terminated by a `data: [DONE]`
407 message.
408 [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
409
410 best_of: Generates `best_of` completions server-side and returns the "best" (the one with
411 the highest log probability per token). Results cannot be streamed.
412
413 When used with `n`, `best_of` controls the number of candidate completions and
414 `n` specifies how many to return – `best_of` must be greater than `n`.
415
416 **Note:** Because this parameter generates many completions, it can quickly
417 consume your token quota. Use carefully and ensure that you have reasonable
418 settings for `max_tokens` and `stop`.
419
420 echo: Echo back the prompt in addition to the completion
421
422 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
423 existing frequency in the text so far, decreasing the model's likelihood to
424 repeat the same line verbatim.
425
426 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
427
428 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
429
430 Accepts a JSON object that maps tokens (specified by their token ID in the GPT
431 tokenizer) to an associated bias value from -100 to 100. You can use this
432 [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
433 Mathematically, the bias is added to the logits generated by the model prior to
434 sampling. The exact effect will vary per model, but values between -1 and 1
435 should decrease or increase likelihood of selection; values like -100 or 100
436 should result in a ban or exclusive selection of the relevant token.
437
438 As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
439 from being generated.
440
441 logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
442 well the chosen tokens. For example, if `logprobs` is 5, the API will return a
443 list of the 5 most likely tokens. The API will always return the `logprob` of
444 the sampled token, so there may be up to `logprobs+1` elements in the response.
445
446 The maximum value for `logprobs` is 5.
447
448 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
449 completion.
450
451 The token count of your prompt plus `max_tokens` cannot exceed the model's
452 context length.
453 [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
454 for counting tokens.
455
456 n: How many completions to generate for each prompt.
457
458 **Note:** Because this parameter generates many completions, it can quickly
459 consume your token quota. Use carefully and ensure that you have reasonable
460 settings for `max_tokens` and `stop`.
461
462 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
463 whether they appear in the text so far, increasing the model's likelihood to
464 talk about new topics.
465
466 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
467
468 seed: If specified, our system will make a best effort to sample deterministically,
469 such that repeated requests with the same `seed` and parameters should return
470 the same result.
471
472 Determinism is not guaranteed, and you should refer to the `system_fingerprint`
473 response parameter to monitor changes in the backend.
474
475 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
476
477 Up to 4 sequences where the API will stop generating further tokens. The
478 returned text will not contain the stop sequence.
479
480 stream_options: Options for streaming response. Only set this when you set `stream: true`.
481
482 suffix: The suffix that comes after a completion of inserted text.
483
484 This parameter is only supported for `gpt-3.5-turbo-instruct`.
485
486 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
487 make the output more random, while lower values like 0.2 will make it more
488 focused and deterministic.
489
490 We generally recommend altering this or `top_p` but not both.
491
492 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
493 model considers the results of the tokens with top_p probability mass. So 0.1
494 means only the tokens comprising the top 10% probability mass are considered.
495
496 We generally recommend altering this or `temperature` but not both.
497
498 user: A unique identifier representing your end-user, which can help OpenAI to monitor
499 and detect abuse.
500 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
501
502 extra_headers: Send extra headers
503
504 extra_query: Add additional query parameters to the request
505
506 extra_body: Add additional JSON properties to the request
507
508 timeout: Override the client-level default timeout for this request, in seconds
509 """
510 ...
511
512 @required_args(["model", "prompt"], ["model", "prompt", "stream"])
513 def create(
514 self,
515 *,
516 model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
517 prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
518 best_of: Optional[int] | Omit = omit,
519 echo: Optional[bool] | Omit = omit,
520 frequency_penalty: Optional[float] | Omit = omit,
521 logit_bias: Optional[Dict[str, int]] | Omit = omit,
522 logprobs: Optional[int] | Omit = omit,
523 max_tokens: Optional[int] | Omit = omit,
524 n: Optional[int] | Omit = omit,
525 presence_penalty: Optional[float] | Omit = omit,
526 seed: Optional[int] | Omit = omit,
527 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
528 stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
529 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
530 suffix: Optional[str] | Omit = omit,
531 temperature: Optional[float] | Omit = omit,
532 top_p: Optional[float] | Omit = omit,
533 user: str | Omit = omit,
534 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
535 # The extra values given here take precedence over values defined on the client or passed to this method.
536 extra_headers: Headers | None = None,
537 extra_query: Query | None = None,
538 extra_body: Body | None = None,
539 timeout: float | httpx.Timeout | None | NotGiven = not_given,
540 ) -> Completion | Stream[Completion]:
541 return self._post(
542 "/completions",
543 body=maybe_transform(
544 {
545 "model": model,
546 "prompt": prompt,
547 "best_of": best_of,
548 "echo": echo,
549 "frequency_penalty": frequency_penalty,
550 "logit_bias": logit_bias,
551 "logprobs": logprobs,
552 "max_tokens": max_tokens,
553 "n": n,
554 "presence_penalty": presence_penalty,
555 "seed": seed,
556 "stop": stop,
557 "stream": stream,
558 "stream_options": stream_options,
559 "suffix": suffix,
560 "temperature": temperature,
561 "top_p": top_p,
562 "user": user,
563 },
564 completion_create_params.CompletionCreateParamsStreaming
565 if stream
566 else completion_create_params.CompletionCreateParamsNonStreaming,
567 ),
568 options=make_request_options(
569 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
570 ),
571 cast_to=Completion,
572 stream=stream or False,
573 stream_cls=Stream[Completion],
574 )
575
576
577class AsyncCompletions(AsyncAPIResource):
578 @cached_property
579 def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
580 """
581 This property can be used as a prefix for any HTTP method call to return
582 the raw response object instead of the parsed content.
583
584 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
585 """
586 return AsyncCompletionsWithRawResponse(self)
587
588 @cached_property
589 def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
590 """
591 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
592
593 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
594 """
595 return AsyncCompletionsWithStreamingResponse(self)
596
597 @overload
598 async def create(
599 self,
600 *,
601 model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
602 prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
603 best_of: Optional[int] | Omit = omit,
604 echo: Optional[bool] | Omit = omit,
605 frequency_penalty: Optional[float] | Omit = omit,
606 logit_bias: Optional[Dict[str, int]] | Omit = omit,
607 logprobs: Optional[int] | Omit = omit,
608 max_tokens: Optional[int] | Omit = omit,
609 n: Optional[int] | Omit = omit,
610 presence_penalty: Optional[float] | Omit = omit,
611 seed: Optional[int] | Omit = omit,
612 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
613 stream: Optional[Literal[False]] | Omit = omit,
614 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
615 suffix: Optional[str] | Omit = omit,
616 temperature: Optional[float] | Omit = omit,
617 top_p: Optional[float] | Omit = omit,
618 user: str | Omit = omit,
619 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
620 # The extra values given here take precedence over values defined on the client or passed to this method.
621 extra_headers: Headers | None = None,
622 extra_query: Query | None = None,
623 extra_body: Body | None = None,
624 timeout: float | httpx.Timeout | None | NotGiven = not_given,
625 ) -> Completion:
626 """
627 Creates a completion for the provided prompt and parameters.
628
629 Args:
630 model: ID of the model to use. You can use the
631 [List models](https://platform.openai.com/docs/api-reference/models/list) API to
632 see all of your available models, or see our
633 [Model overview](https://platform.openai.com/docs/models) for descriptions of
634 them.
635
636 prompt: The prompt(s) to generate completions for, encoded as a string, array of
637 strings, array of tokens, or array of token arrays.
638
639 Note that <|endoftext|> is the document separator that the model sees during
640 training, so if a prompt is not specified the model will generate as if from the
641 beginning of a new document.
642
643 best_of: Generates `best_of` completions server-side and returns the "best" (the one with
644 the highest log probability per token). Results cannot be streamed.
645
646 When used with `n`, `best_of` controls the number of candidate completions and
647 `n` specifies how many to return – `best_of` must be greater than `n`.
648
649 **Note:** Because this parameter generates many completions, it can quickly
650 consume your token quota. Use carefully and ensure that you have reasonable
651 settings for `max_tokens` and `stop`.
652
653 echo: Echo back the prompt in addition to the completion
654
655 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
656 existing frequency in the text so far, decreasing the model's likelihood to
657 repeat the same line verbatim.
658
659 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
660
661 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
662
663 Accepts a JSON object that maps tokens (specified by their token ID in the GPT
664 tokenizer) to an associated bias value from -100 to 100. You can use this
665 [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
666 Mathematically, the bias is added to the logits generated by the model prior to
667 sampling. The exact effect will vary per model, but values between -1 and 1
668 should decrease or increase likelihood of selection; values like -100 or 100
669 should result in a ban or exclusive selection of the relevant token.
670
671 As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
672 from being generated.
673
674 logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
675 well the chosen tokens. For example, if `logprobs` is 5, the API will return a
676 list of the 5 most likely tokens. The API will always return the `logprob` of
677 the sampled token, so there may be up to `logprobs+1` elements in the response.
678
679 The maximum value for `logprobs` is 5.
680
681 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
682 completion.
683
684 The token count of your prompt plus `max_tokens` cannot exceed the model's
685 context length.
686 [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
687 for counting tokens.
688
689 n: How many completions to generate for each prompt.
690
691 **Note:** Because this parameter generates many completions, it can quickly
692 consume your token quota. Use carefully and ensure that you have reasonable
693 settings for `max_tokens` and `stop`.
694
695 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
696 whether they appear in the text so far, increasing the model's likelihood to
697 talk about new topics.
698
699 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
700
701 seed: If specified, our system will make a best effort to sample deterministically,
702 such that repeated requests with the same `seed` and parameters should return
703 the same result.
704
705 Determinism is not guaranteed, and you should refer to the `system_fingerprint`
706 response parameter to monitor changes in the backend.
707
708 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
709
710 Up to 4 sequences where the API will stop generating further tokens. The
711 returned text will not contain the stop sequence.
712
713 stream: Whether to stream back partial progress. If set, tokens will be sent as
714 data-only
715 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
716 as they become available, with the stream terminated by a `data: [DONE]`
717 message.
718 [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
719
720 stream_options: Options for streaming response. Only set this when you set `stream: true`.
721
722 suffix: The suffix that comes after a completion of inserted text.
723
724 This parameter is only supported for `gpt-3.5-turbo-instruct`.
725
726 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
727 make the output more random, while lower values like 0.2 will make it more
728 focused and deterministic.
729
730 We generally recommend altering this or `top_p` but not both.
731
732 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
733 model considers the results of the tokens with top_p probability mass. So 0.1
734 means only the tokens comprising the top 10% probability mass are considered.
735
736 We generally recommend altering this or `temperature` but not both.
737
738 user: A unique identifier representing your end-user, which can help OpenAI to monitor
739 and detect abuse.
740 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
741
742 extra_headers: Send extra headers
743
744 extra_query: Add additional query parameters to the request
745
746 extra_body: Add additional JSON properties to the request
747
748 timeout: Override the client-level default timeout for this request, in seconds
749 """
750 ...
751
752 @overload
753 async def create(
754 self,
755 *,
756 model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
757 prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
758 stream: Literal[True],
759 best_of: Optional[int] | Omit = omit,
760 echo: Optional[bool] | Omit = omit,
761 frequency_penalty: Optional[float] | Omit = omit,
762 logit_bias: Optional[Dict[str, int]] | Omit = omit,
763 logprobs: Optional[int] | Omit = omit,
764 max_tokens: Optional[int] | Omit = omit,
765 n: Optional[int] | Omit = omit,
766 presence_penalty: Optional[float] | Omit = omit,
767 seed: Optional[int] | Omit = omit,
768 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
769 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
770 suffix: Optional[str] | Omit = omit,
771 temperature: Optional[float] | Omit = omit,
772 top_p: Optional[float] | Omit = omit,
773 user: str | Omit = omit,
774 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
775 # The extra values given here take precedence over values defined on the client or passed to this method.
776 extra_headers: Headers | None = None,
777 extra_query: Query | None = None,
778 extra_body: Body | None = None,
779 timeout: float | httpx.Timeout | None | NotGiven = not_given,
780 ) -> AsyncStream[Completion]:
781 """
782 Creates a completion for the provided prompt and parameters.
783
784 Args:
785 model: ID of the model to use. You can use the
786 [List models](https://platform.openai.com/docs/api-reference/models/list) API to
787 see all of your available models, or see our
788 [Model overview](https://platform.openai.com/docs/models) for descriptions of
789 them.
790
791 prompt: The prompt(s) to generate completions for, encoded as a string, array of
792 strings, array of tokens, or array of token arrays.
793
794 Note that <|endoftext|> is the document separator that the model sees during
795 training, so if a prompt is not specified the model will generate as if from the
796 beginning of a new document.
797
798 stream: Whether to stream back partial progress. If set, tokens will be sent as
799 data-only
800 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
801 as they become available, with the stream terminated by a `data: [DONE]`
802 message.
803 [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
804
805 best_of: Generates `best_of` completions server-side and returns the "best" (the one with
806 the highest log probability per token). Results cannot be streamed.
807
808 When used with `n`, `best_of` controls the number of candidate completions and
809 `n` specifies how many to return – `best_of` must be greater than `n`.
810
811 **Note:** Because this parameter generates many completions, it can quickly
812 consume your token quota. Use carefully and ensure that you have reasonable
813 settings for `max_tokens` and `stop`.
814
815 echo: Echo back the prompt in addition to the completion
816
817 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
818 existing frequency in the text so far, decreasing the model's likelihood to
819 repeat the same line verbatim.
820
821 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
822
823 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
824
825 Accepts a JSON object that maps tokens (specified by their token ID in the GPT
826 tokenizer) to an associated bias value from -100 to 100. You can use this
827 [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
828 Mathematically, the bias is added to the logits generated by the model prior to
829 sampling. The exact effect will vary per model, but values between -1 and 1
830 should decrease or increase likelihood of selection; values like -100 or 100
831 should result in a ban or exclusive selection of the relevant token.
832
833 As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
834 from being generated.
835
836 logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
837 well the chosen tokens. For example, if `logprobs` is 5, the API will return a
838 list of the 5 most likely tokens. The API will always return the `logprob` of
839 the sampled token, so there may be up to `logprobs+1` elements in the response.
840
841 The maximum value for `logprobs` is 5.
842
843 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
844 completion.
845
846 The token count of your prompt plus `max_tokens` cannot exceed the model's
847 context length.
848 [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
849 for counting tokens.
850
851 n: How many completions to generate for each prompt.
852
853 **Note:** Because this parameter generates many completions, it can quickly
854 consume your token quota. Use carefully and ensure that you have reasonable
855 settings for `max_tokens` and `stop`.
856
857 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
858 whether they appear in the text so far, increasing the model's likelihood to
859 talk about new topics.
860
861 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
862
863 seed: If specified, our system will make a best effort to sample deterministically,
864 such that repeated requests with the same `seed` and parameters should return
865 the same result.
866
867 Determinism is not guaranteed, and you should refer to the `system_fingerprint`
868 response parameter to monitor changes in the backend.
869
870 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
871
872 Up to 4 sequences where the API will stop generating further tokens. The
873 returned text will not contain the stop sequence.
874
875 stream_options: Options for streaming response. Only set this when you set `stream: true`.
876
877 suffix: The suffix that comes after a completion of inserted text.
878
879 This parameter is only supported for `gpt-3.5-turbo-instruct`.
880
881 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
882 make the output more random, while lower values like 0.2 will make it more
883 focused and deterministic.
884
885 We generally recommend altering this or `top_p` but not both.
886
887 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
888 model considers the results of the tokens with top_p probability mass. So 0.1
889 means only the tokens comprising the top 10% probability mass are considered.
890
891 We generally recommend altering this or `temperature` but not both.
892
893 user: A unique identifier representing your end-user, which can help OpenAI to monitor
894 and detect abuse.
895 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
896
897 extra_headers: Send extra headers
898
899 extra_query: Add additional query parameters to the request
900
901 extra_body: Add additional JSON properties to the request
902
903 timeout: Override the client-level default timeout for this request, in seconds
904 """
905 ...
906
907 @overload
908 async def create(
909 self,
910 *,
911 model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
912 prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
913 stream: bool,
914 best_of: Optional[int] | Omit = omit,
915 echo: Optional[bool] | Omit = omit,
916 frequency_penalty: Optional[float] | Omit = omit,
917 logit_bias: Optional[Dict[str, int]] | Omit = omit,
918 logprobs: Optional[int] | Omit = omit,
919 max_tokens: Optional[int] | Omit = omit,
920 n: Optional[int] | Omit = omit,
921 presence_penalty: Optional[float] | Omit = omit,
922 seed: Optional[int] | Omit = omit,
923 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
924 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
925 suffix: Optional[str] | Omit = omit,
926 temperature: Optional[float] | Omit = omit,
927 top_p: Optional[float] | Omit = omit,
928 user: str | Omit = omit,
929 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
930 # The extra values given here take precedence over values defined on the client or passed to this method.
931 extra_headers: Headers | None = None,
932 extra_query: Query | None = None,
933 extra_body: Body | None = None,
934 timeout: float | httpx.Timeout | None | NotGiven = not_given,
935 ) -> Completion | AsyncStream[Completion]:
936 """
937 Creates a completion for the provided prompt and parameters.
938
939 Args:
940 model: ID of the model to use. You can use the
941 [List models](https://platform.openai.com/docs/api-reference/models/list) API to
942 see all of your available models, or see our
943 [Model overview](https://platform.openai.com/docs/models) for descriptions of
944 them.
945
946 prompt: The prompt(s) to generate completions for, encoded as a string, array of
947 strings, array of tokens, or array of token arrays.
948
949 Note that <|endoftext|> is the document separator that the model sees during
950 training, so if a prompt is not specified the model will generate as if from the
951 beginning of a new document.
952
953 stream: Whether to stream back partial progress. If set, tokens will be sent as
954 data-only
955 [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
956 as they become available, with the stream terminated by a `data: [DONE]`
957 message.
958 [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
959
960 best_of: Generates `best_of` completions server-side and returns the "best" (the one with
961 the highest log probability per token). Results cannot be streamed.
962
963 When used with `n`, `best_of` controls the number of candidate completions and
964 `n` specifies how many to return – `best_of` must be greater than `n`.
965
966 **Note:** Because this parameter generates many completions, it can quickly
967 consume your token quota. Use carefully and ensure that you have reasonable
968 settings for `max_tokens` and `stop`.
969
970 echo: Echo back the prompt in addition to the completion
971
972 frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
973 existing frequency in the text so far, decreasing the model's likelihood to
974 repeat the same line verbatim.
975
976 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
977
978 logit_bias: Modify the likelihood of specified tokens appearing in the completion.
979
980 Accepts a JSON object that maps tokens (specified by their token ID in the GPT
981 tokenizer) to an associated bias value from -100 to 100. You can use this
982 [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
983 Mathematically, the bias is added to the logits generated by the model prior to
984 sampling. The exact effect will vary per model, but values between -1 and 1
985 should decrease or increase likelihood of selection; values like -100 or 100
986 should result in a ban or exclusive selection of the relevant token.
987
988 As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
989 from being generated.
990
991 logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
992 well the chosen tokens. For example, if `logprobs` is 5, the API will return a
993 list of the 5 most likely tokens. The API will always return the `logprob` of
994 the sampled token, so there may be up to `logprobs+1` elements in the response.
995
996 The maximum value for `logprobs` is 5.
997
998 max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
999 completion.
1000
1001 The token count of your prompt plus `max_tokens` cannot exceed the model's
1002 context length.
1003 [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
1004 for counting tokens.
1005
1006 n: How many completions to generate for each prompt.
1007
1008 **Note:** Because this parameter generates many completions, it can quickly
1009 consume your token quota. Use carefully and ensure that you have reasonable
1010 settings for `max_tokens` and `stop`.
1011
1012 presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
1013 whether they appear in the text so far, increasing the model's likelihood to
1014 talk about new topics.
1015
1016 [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
1017
1018 seed: If specified, our system will make a best effort to sample deterministically,
1019 such that repeated requests with the same `seed` and parameters should return
1020 the same result.
1021
1022 Determinism is not guaranteed, and you should refer to the `system_fingerprint`
1023 response parameter to monitor changes in the backend.
1024
1025 stop: Not supported with latest reasoning models `o3` and `o4-mini`.
1026
1027 Up to 4 sequences where the API will stop generating further tokens. The
1028 returned text will not contain the stop sequence.
1029
1030 stream_options: Options for streaming response. Only set this when you set `stream: true`.
1031
1032 suffix: The suffix that comes after a completion of inserted text.
1033
1034 This parameter is only supported for `gpt-3.5-turbo-instruct`.
1035
1036 temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
1037 make the output more random, while lower values like 0.2 will make it more
1038 focused and deterministic.
1039
1040 We generally recommend altering this or `top_p` but not both.
1041
1042 top_p: An alternative to sampling with temperature, called nucleus sampling, where the
1043 model considers the results of the tokens with top_p probability mass. So 0.1
1044 means only the tokens comprising the top 10% probability mass are considered.
1045
1046 We generally recommend altering this or `temperature` but not both.
1047
1048 user: A unique identifier representing your end-user, which can help OpenAI to monitor
1049 and detect abuse.
1050 [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
1051
1052 extra_headers: Send extra headers
1053
1054 extra_query: Add additional query parameters to the request
1055
1056 extra_body: Add additional JSON properties to the request
1057
1058 timeout: Override the client-level default timeout for this request, in seconds
1059 """
1060 ...
1061
1062 @required_args(["model", "prompt"], ["model", "prompt", "stream"])
1063 async def create(
1064 self,
1065 *,
1066 model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
1067 prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
1068 best_of: Optional[int] | Omit = omit,
1069 echo: Optional[bool] | Omit = omit,
1070 frequency_penalty: Optional[float] | Omit = omit,
1071 logit_bias: Optional[Dict[str, int]] | Omit = omit,
1072 logprobs: Optional[int] | Omit = omit,
1073 max_tokens: Optional[int] | Omit = omit,
1074 n: Optional[int] | Omit = omit,
1075 presence_penalty: Optional[float] | Omit = omit,
1076 seed: Optional[int] | Omit = omit,
1077 stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
1078 stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
1079 stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
1080 suffix: Optional[str] | Omit = omit,
1081 temperature: Optional[float] | Omit = omit,
1082 top_p: Optional[float] | Omit = omit,
1083 user: str | Omit = omit,
1084 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
1085 # The extra values given here take precedence over values defined on the client or passed to this method.
1086 extra_headers: Headers | None = None,
1087 extra_query: Query | None = None,
1088 extra_body: Body | None = None,
1089 timeout: float | httpx.Timeout | None | NotGiven = not_given,
1090 ) -> Completion | AsyncStream[Completion]:
1091 return await self._post(
1092 "/completions",
1093 body=await async_maybe_transform(
1094 {
1095 "model": model,
1096 "prompt": prompt,
1097 "best_of": best_of,
1098 "echo": echo,
1099 "frequency_penalty": frequency_penalty,
1100 "logit_bias": logit_bias,
1101 "logprobs": logprobs,
1102 "max_tokens": max_tokens,
1103 "n": n,
1104 "presence_penalty": presence_penalty,
1105 "seed": seed,
1106 "stop": stop,
1107 "stream": stream,
1108 "stream_options": stream_options,
1109 "suffix": suffix,
1110 "temperature": temperature,
1111 "top_p": top_p,
1112 "user": user,
1113 },
1114 completion_create_params.CompletionCreateParamsStreaming
1115 if stream
1116 else completion_create_params.CompletionCreateParamsNonStreaming,
1117 ),
1118 options=make_request_options(
1119 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1120 ),
1121 cast_to=Completion,
1122 stream=stream or False,
1123 stream_cls=AsyncStream[Completion],
1124 )
1125
1126
1127class CompletionsWithRawResponse:
1128 def __init__(self, completions: Completions) -> None:
1129 self._completions = completions
1130
1131 self.create = _legacy_response.to_raw_response_wrapper(
1132 completions.create,
1133 )
1134
1135
1136class AsyncCompletionsWithRawResponse:
1137 def __init__(self, completions: AsyncCompletions) -> None:
1138 self._completions = completions
1139
1140 self.create = _legacy_response.async_to_raw_response_wrapper(
1141 completions.create,
1142 )
1143
1144
1145class CompletionsWithStreamingResponse:
1146 def __init__(self, completions: Completions) -> None:
1147 self._completions = completions
1148
1149 self.create = to_streamed_response_wrapper(
1150 completions.create,
1151 )
1152
1153
1154class AsyncCompletionsWithStreamingResponse:
1155 def __init__(self, completions: AsyncCompletions) -> None:
1156 self._completions = completions
1157
1158 self.create = async_to_streamed_response_wrapper(
1159 completions.create,
1160 )