Commit `d1616886`

@@ -53,7 +53,9 @@ class Speech(SyncAPIResource):
               `tts-1` or `tts-1-hd`
 
           voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
+              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
+              available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
 
           response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`.
 
@@ -120,7 +122,9 @@ class AsyncSpeech(AsyncAPIResource):
               `tts-1` or `tts-1-hd`
 
           voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
+              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
+              available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
 
           response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`.

@@ -51,11 +51,11 @@ class Completions(SyncAPIResource):
                 "gpt-4-32k",
                 "gpt-4-32k-0314",
                 "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo",
                 "gpt-3.5-turbo-16k",
                 "gpt-3.5-turbo-0301",
                 "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo-16k-0613",
             ],
         ],
@@ -97,7 +97,7 @@ class Completions(SyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           function_call: Deprecated in favor of `tool_choice`.
 
@@ -130,13 +130,15 @@ class Completions(SyncAPIResource):
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
               for counting tokens.
 
-          n: How many chat completion choices to generate for each input message.
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
 
           presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output.
 
@@ -146,10 +148,10 @@ class Completions(SyncAPIResource):
               **Important:** when using JSON mode, you **must** also instruct the model to
               produce JSON yourself via a system or user message. Without this, the model may
               generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
 
           seed: This feature is in Beta. If specified, our system will make a best effort to
               sample deterministically, such that repeated requests with the same `seed` and
@@ -222,11 +224,11 @@ class Completions(SyncAPIResource):
                 "gpt-4-32k",
                 "gpt-4-32k-0314",
                 "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo",
                 "gpt-3.5-turbo-16k",
                 "gpt-3.5-turbo-0301",
                 "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo-16k-0613",
             ],
         ],
@@ -275,7 +277,7 @@ class Completions(SyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           function_call: Deprecated in favor of `tool_choice`.
 
@@ -308,13 +310,15 @@ class Completions(SyncAPIResource):
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
               for counting tokens.
 
-          n: How many chat completion choices to generate for each input message.
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
 
           presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output.
 
@@ -324,10 +328,10 @@ class Completions(SyncAPIResource):
               **Important:** when using JSON mode, you **must** also instruct the model to
               produce JSON yourself via a system or user message. Without this, the model may
               generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
 
           seed: This feature is in Beta. If specified, our system will make a best effort to
               sample deterministically, such that repeated requests with the same `seed` and
@@ -393,11 +397,11 @@ class Completions(SyncAPIResource):
                 "gpt-4-32k",
                 "gpt-4-32k-0314",
                 "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo",
                 "gpt-3.5-turbo-16k",
                 "gpt-3.5-turbo-0301",
                 "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo-16k-0613",
             ],
         ],
@@ -446,7 +450,7 @@ class Completions(SyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           function_call: Deprecated in favor of `tool_choice`.
 
@@ -479,13 +483,15 @@ class Completions(SyncAPIResource):
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
               for counting tokens.
 
-          n: How many chat completion choices to generate for each input message.
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
 
           presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output.
 
@@ -495,10 +501,10 @@ class Completions(SyncAPIResource):
               **Important:** when using JSON mode, you **must** also instruct the model to
               produce JSON yourself via a system or user message. Without this, the model may
               generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
 
           seed: This feature is in Beta. If specified, our system will make a best effort to
               sample deterministically, such that repeated requests with the same `seed` and
@@ -564,11 +570,11 @@ class Completions(SyncAPIResource):
                 "gpt-4-32k",
                 "gpt-4-32k-0314",
                 "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo",
                 "gpt-3.5-turbo-16k",
                 "gpt-3.5-turbo-0301",
                 "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo-16k-0613",
             ],
         ],
@@ -652,11 +658,11 @@ class AsyncCompletions(AsyncAPIResource):
                 "gpt-4-32k",
                 "gpt-4-32k-0314",
                 "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo",
                 "gpt-3.5-turbo-16k",
                 "gpt-3.5-turbo-0301",
                 "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo-16k-0613",
             ],
         ],
@@ -698,7 +704,7 @@ class AsyncCompletions(AsyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           function_call: Deprecated in favor of `tool_choice`.
 
@@ -731,13 +737,15 @@ class AsyncCompletions(AsyncAPIResource):
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
               for counting tokens.
 
-          n: How many chat completion choices to generate for each input message.
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
 
           presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output.
 
@@ -747,10 +755,10 @@ class AsyncCompletions(AsyncAPIResource):
               **Important:** when using JSON mode, you **must** also instruct the model to
               produce JSON yourself via a system or user message. Without this, the model may
               generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
 
           seed: This feature is in Beta. If specified, our system will make a best effort to
               sample deterministically, such that repeated requests with the same `seed` and
@@ -823,11 +831,11 @@ class AsyncCompletions(AsyncAPIResource):
                 "gpt-4-32k",
                 "gpt-4-32k-0314",
                 "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo",
                 "gpt-3.5-turbo-16k",
                 "gpt-3.5-turbo-0301",
                 "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo-16k-0613",
             ],
         ],
@@ -876,7 +884,7 @@ class AsyncCompletions(AsyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           function_call: Deprecated in favor of `tool_choice`.
 
@@ -909,13 +917,15 @@ class AsyncCompletions(AsyncAPIResource):
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
               for counting tokens.
 
-          n: How many chat completion choices to generate for each input message.
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
 
           presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output.
 
@@ -925,10 +935,10 @@ class AsyncCompletions(AsyncAPIResource):
               **Important:** when using JSON mode, you **must** also instruct the model to
               produce JSON yourself via a system or user message. Without this, the model may
               generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
 
           seed: This feature is in Beta. If specified, our system will make a best effort to
               sample deterministically, such that repeated requests with the same `seed` and
@@ -994,11 +1004,11 @@ class AsyncCompletions(AsyncAPIResource):
                 "gpt-4-32k",
                 "gpt-4-32k-0314",
                 "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo",
                 "gpt-3.5-turbo-16k",
                 "gpt-3.5-turbo-0301",
                 "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo-16k-0613",
             ],
         ],
@@ -1047,7 +1057,7 @@ class AsyncCompletions(AsyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           function_call: Deprecated in favor of `tool_choice`.
 
@@ -1080,13 +1090,15 @@ class AsyncCompletions(AsyncAPIResource):
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
               for counting tokens.
 
-          n: How many chat completion choices to generate for each input message.
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
 
           presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           response_format: An object specifying the format that the model must output.
 
@@ -1096,10 +1108,10 @@ class AsyncCompletions(AsyncAPIResource):
               **Important:** when using JSON mode, you **must** also instruct the model to
               produce JSON yourself via a system or user message. Without this, the model may
               generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in increased latency and appearance of a "stuck" request. Also
-              note that the message content may be partially cut off if
-              `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-              or the conversation exceeded the max context length.
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
 
           seed: This feature is in Beta. If specified, our system will make a best effort to
               sample deterministically, such that repeated requests with the same `seed` and
@@ -1165,11 +1177,11 @@ class AsyncCompletions(AsyncAPIResource):
                 "gpt-4-32k",
                 "gpt-4-32k-0314",
                 "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo",
                 "gpt-3.5-turbo-16k",
                 "gpt-3.5-turbo-0301",
                 "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo-16k-0613",
             ],
         ],

@@ -103,7 +103,7 @@ class Completions(SyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -143,7 +143,7 @@ class Completions(SyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -272,7 +272,7 @@ class Completions(SyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -312,7 +312,7 @@ class Completions(SyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -434,7 +434,7 @@ class Completions(SyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -474,7 +474,7 @@ class Completions(SyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -671,7 +671,7 @@ class AsyncCompletions(AsyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -711,7 +711,7 @@ class AsyncCompletions(AsyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -840,7 +840,7 @@ class AsyncCompletions(AsyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -880,7 +880,7 @@ class AsyncCompletions(AsyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -1002,7 +1002,7 @@ class AsyncCompletions(AsyncAPIResource):
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -1042,7 +1042,7 @@ class AsyncCompletions(AsyncAPIResource):
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return

@@ -51,7 +51,8 @@ class Embeddings(SyncAPIResource):
           input: Input text to embed, encoded as a string or array of tokens. To embed multiple
               inputs in a single request, pass an array of strings or array of token arrays.
               The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`) and cannot be an empty string.
+              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
               for counting tokens.
 
@@ -144,7 +145,8 @@ class AsyncEmbeddings(AsyncAPIResource):
           input: Input text to embed, encoded as a string or array of tokens. To embed multiple
               inputs in a single request, pass an array of strings or array of token arrays.
               The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`) and cannot be an empty string.
+              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
               for counting tokens.

@@ -46,12 +46,12 @@ class Files(SyncAPIResource):
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileObject:
-        """Upload a file that can be used across various endpoints/features.
+        """Upload a file that can be used across various endpoints.
 
-        The size of
-        all the files uploaded by one organization can be up to 100 GB.
+        The size of all the
+        files uploaded by one organization can be up to 100 GB.
 
-        The size of individual files for can be a maximum of 512MB. See the
+        The size of individual files can be a maximum of 512 MB. See the
         [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
         learn more about the types of files supported. The Fine-tuning API only supports
         `.jsonl` files.
@@ -309,12 +309,12 @@ class AsyncFiles(AsyncAPIResource):
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileObject:
-        """Upload a file that can be used across various endpoints/features.
+        """Upload a file that can be used across various endpoints.
 
-        The size of
-        all the files uploaded by one organization can be up to 100 GB.
+        The size of all the
+        files uploaded by one organization can be up to 100 GB.
 
-        The size of individual files for can be a maximum of 512MB. See the
+        The size of individual files can be a maximum of 512 MB. See the
         [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
         learn more about the types of files supported. The Fine-tuning API only supports
         `.jsonl` files.

@@ -22,6 +22,8 @@ class SpeechCreateParams(TypedDict, total=False):
     """The voice to use when generating the audio.
 
     Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
+    Previews of the voices are available in the
+    [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
     """
 
     response_format: Literal["mp3", "opus", "aac", "flac"]

@@ -24,12 +24,15 @@ class FunctionCall(TypedDict, total=False):
 
 
 class ChatCompletionAssistantMessageParam(TypedDict, total=False):
-    content: Required[Optional[str]]
-    """The contents of the assistant message."""
-
     role: Required[Literal["assistant"]]
     """The role of the messages author, in this case `assistant`."""
 
+    content: Optional[str]
+    """The contents of the assistant message.
+
+    Required unless `tool_calls` or `function_call` is specified.
+    """
+
     function_call: FunctionCall
     """Deprecated and replaced by `tool_calls`.
 
@@ -37,5 +40,12 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False):
     model.
     """
 
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
+
     tool_calls: List[ChatCompletionMessageToolCallParam]
     """The tool calls generated by the model, such as function calls."""

@@ -12,7 +12,11 @@ class ImageURL(TypedDict, total=False):
     """Either a URL of the image or the base64 encoded image data."""
 
     detail: Literal["auto", "low", "high"]
-    """Specifies the detail level of the image."""
+    """Specifies the detail level of the image.
+
+    Learn more in the
+    [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding).
+    """
 
 
 class ChatCompletionContentPartImageParam(TypedDict, total=False):

@@ -2,15 +2,14 @@
 
 from __future__ import annotations
 
-from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
 __all__ = ["ChatCompletionFunctionMessageParam"]
 
 
 class ChatCompletionFunctionMessageParam(TypedDict, total=False):
-    content: Required[Optional[str]]
-    """The return value from the function call, to return to the model."""
+    content: Required[str]
+    """The contents of the function message."""
 
     name: Required[str]
     """The name of the function to call."""

@@ -13,7 +13,7 @@ class Function(TypedDict, total=False):
 
 
 class ChatCompletionNamedToolChoiceParam(TypedDict, total=False):
-    function: Function
+    function: Required[Function]
 
-    type: Literal["function"]
+    type: Required[Literal["function"]]
     """The type of the tool. Currently, only `function` is supported."""

@@ -2,15 +2,21 @@
 
 from __future__ import annotations
 
-from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
 __all__ = ["ChatCompletionSystemMessageParam"]
 
 
 class ChatCompletionSystemMessageParam(TypedDict, total=False):
-    content: Required[Optional[str]]
+    content: Required[str]
     """The contents of the system message."""
 
     role: Required[Literal["system"]]
     """The role of the messages author, in this case `system`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """

@@ -2,14 +2,13 @@
 
 from __future__ import annotations
 
-from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
 __all__ = ["ChatCompletionToolMessageParam"]
 
 
 class ChatCompletionToolMessageParam(TypedDict, total=False):
-    content: Required[Optional[str]]
+    content: Required[str]
     """The contents of the tool message."""
 
     role: Required[Literal["tool"]]

@@ -11,8 +11,15 @@ __all__ = ["ChatCompletionUserMessageParam"]
 
 
 class ChatCompletionUserMessageParam(TypedDict, total=False):
-    content: Required[Union[str, List[ChatCompletionContentPartParam], None]]
+    content: Required[Union[str, List[ChatCompletionContentPartParam]]]
     """The contents of the user message."""
 
     role: Required[Literal["user"]]
     """The role of the messages author, in this case `user`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """

@@ -44,11 +44,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
                 "gpt-4-32k",
                 "gpt-4-32k-0314",
                 "gpt-4-32k-0613",
-                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo",
                 "gpt-3.5-turbo-16k",
                 "gpt-3.5-turbo-0301",
                 "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
                 "gpt-3.5-turbo-16k-0613",
             ],
         ]
@@ -66,7 +66,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Positive values penalize new tokens based on their existing frequency in the
     text so far, decreasing the model's likelihood to repeat the same line verbatim.
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
     """
 
     function_call: FunctionCall
@@ -109,7 +109,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     n: Optional[int]
-    """How many chat completion choices to generate for each input message."""
+    """How many chat completion choices to generate for each input message.
+
+    Note that you will be charged based on the number of generated tokens across all
+    of the choices. Keep `n` as `1` to minimize costs.
+    """
 
     presence_penalty: Optional[float]
     """Number between -2.0 and 2.0.
@@ -117,7 +121,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Positive values penalize new tokens based on whether they appear in the text so
     far, increasing the model's likelihood to talk about new topics.
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
     """
 
     response_format: ResponseFormat
@@ -129,19 +133,19 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     **Important:** when using JSON mode, you **must** also instruct the model to
     produce JSON yourself via a system or user message. Without this, the model may
     generate an unending stream of whitespace until the generation reaches the token
-    limit, resulting in increased latency and appearance of a "stuck" request. Also
-    note that the message content may be partially cut off if
-    `finish_reason="length"`, which indicates the generation exceeded `max_tokens`
-    or the conversation exceeded the max context length.
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
     """
 
     seed: Optional[int]
-    """This feature is in Beta.
-
-    If specified, our system will make a best effort to sample deterministically,
-    such that repeated requests with the same `seed` and parameters should return
-    the same result. Determinism is not guaranteed, and you should refer to the
-    `system_fingerprint` response parameter to monitor changes in the backend.
+    """
+    This feature is in Beta. If specified, our system will make a best effort to
+    sample deterministically, such that repeated requests with the same `seed` and
+    parameters should return the same result. Determinism is not guaranteed, and you
+    should refer to the `system_fingerprint` response parameter to monitor changes
+    in the backend.
     """
 
     stop: Union[Optional[str], List[str]]
@@ -204,22 +208,22 @@ class Function(TypedDict, total=False):
     of 64.
     """
 
-    parameters: Required[shared_params.FunctionParameters]
+    description: str
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: shared_params.FunctionParameters
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the [guide](https://platform.openai.com/docs/guides/gpt/function-calling)
+    See the
+    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
     for examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
-    To describe a function that accepts no parameters, provide the value
-    `{"type": "object", "properties": {}}`.
-    """
-
-    description: str
-    """
-    A description of what the function does, used by the model to choose when and
-    how to call the function.
+    Omitting `parameters` defines a function with an empty parameter list.
     """

@@ -16,20 +16,20 @@ class FunctionDefinition(BaseModel):
     of 64.
     """
 
-    parameters: FunctionParameters
+    description: Optional[str] = None
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: Optional[FunctionParameters] = None
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the [guide](https://platform.openai.com/docs/guides/gpt/function-calling)
+    See the
+    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
     for examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
-    To describe a function that accepts no parameters, provide the value
-    `{"type": "object", "properties": {}}`.
-    """
-
-    description: Optional[str] = None
-    """
-    A description of what the function does, used by the model to choose when and
-    how to call the function.
+    Omitting `parameters` defines a function with an empty parameter list.
     """

@@ -17,20 +17,20 @@ class FunctionDefinition(TypedDict, total=False):
     of 64.
     """
 
-    parameters: Required[shared_params.FunctionParameters]
+    description: str
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: shared_params.FunctionParameters
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the [guide](https://platform.openai.com/docs/guides/gpt/function-calling)
+    See the
+    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
     for examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
-    To describe a function that accepts no parameters, provide the value
-    `{"type": "object", "properties": {}}`.
-    """
-
-    description: str
-    """
-    A description of what the function does, used by the model to choose when and
-    how to call the function.
+    Omitting `parameters` defines a function with an empty parameter list.
     """

@@ -67,7 +67,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Positive values penalize new tokens based on their existing frequency in the
     text so far, decreasing the model's likelihood to repeat the same line verbatim.
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
     """
 
     logit_bias: Optional[Dict[str, int]]
@@ -119,7 +119,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Positive values penalize new tokens based on whether they appear in the text so
     far, increasing the model's likelihood to talk about new topics.
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
     """
 
     seed: Optional[int]

@@ -14,7 +14,8 @@ class EmbeddingCreateParams(TypedDict, total=False):
 
     To embed multiple inputs in a single request, pass an array of strings or array
     of token arrays. The input must not exceed the max input tokens for the model
-    (8192 tokens for `text-embedding-ada-002`) and cannot be an empty string.
+    (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any
+    array must be 2048 dimensions or less.
     [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
     for counting tokens.
     """

@@ -24,7 +24,7 @@ class TestFiles:
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         file = client.beta.assistants.files.create(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
+            "file-abc123",
             file_id="string",
         )
         assert_matches_type(AssistantFile, file, path=["response"])
@@ -32,7 +32,7 @@ class TestFiles:
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.beta.assistants.files.with_raw_response.create(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
+            "file-abc123",
             file_id="string",
         )
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -111,7 +111,7 @@ class TestAsyncFiles:
     @parametrize
     async def test_method_create(self, client: AsyncOpenAI) -> None:
         file = await client.beta.assistants.files.create(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
+            "file-abc123",
             file_id="string",
         )
         assert_matches_type(AssistantFile, file, path=["response"])
@@ -119,7 +119,7 @@ class TestAsyncFiles:
     @parametrize
     async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
         response = await client.beta.assistants.files.with_raw_response.create(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
+            "file-abc123",
             file_id="string",
         )
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"

@@ -24,18 +24,18 @@ class TestFiles:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         file = client.beta.threads.messages.files.retrieve(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            thread_id="thread_AF1WoRqd3aJAHsqc9NY7iL8F",
-            message_id="msg_AF1WoRqd3aJAHsqc9NY7iL8F",
+            "file-abc123",
+            thread_id="thread_abc123",
+            message_id="msg_abc123",
         )
         assert_matches_type(MessageFile, file, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.beta.threads.messages.files.with_raw_response.retrieve(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            thread_id="thread_AF1WoRqd3aJAHsqc9NY7iL8F",
-            message_id="msg_AF1WoRqd3aJAHsqc9NY7iL8F",
+            "file-abc123",
+            thread_id="thread_abc123",
+            message_id="msg_abc123",
         )
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
@@ -80,18 +80,18 @@ class TestAsyncFiles:
     @parametrize
     async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
         file = await client.beta.threads.messages.files.retrieve(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            thread_id="thread_AF1WoRqd3aJAHsqc9NY7iL8F",
-            message_id="msg_AF1WoRqd3aJAHsqc9NY7iL8F",
+            "file-abc123",
+            thread_id="thread_abc123",
+            message_id="msg_abc123",
         )
         assert_matches_type(MessageFile, file, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, client: AsyncOpenAI) -> None:
         response = await client.beta.threads.messages.files.with_raw_response.retrieve(
-            "file-AF1WoRqd3aJAHsqc9NY7iL8F",
-            thread_id="thread_AF1WoRqd3aJAHsqc9NY7iL8F",
-            message_id="msg_AF1WoRqd3aJAHsqc9NY7iL8F",
+            "file-abc123",
+            thread_id="thread_abc123",
+            message_id="msg_abc123",
         )
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()

@@ -40,6 +40,7 @@ class TestCompletions:
                 {
                     "content": "string",
                     "role": "system",
+                    "name": "string",
                 }
             ],
             model="gpt-3.5-turbo",
@@ -128,6 +129,7 @@ class TestCompletions:
                 {
                     "content": "string",
                     "role": "system",
+                    "name": "string",
                 }
             ],
             model="gpt-3.5-turbo",
@@ -221,6 +223,7 @@ class TestAsyncCompletions:
                 {
                     "content": "string",
                     "role": "system",
+                    "name": "string",
                 }
             ],
             model="gpt-3.5-turbo",
@@ -309,6 +312,7 @@ class TestAsyncCompletions:
                 {
                     "content": "string",
                     "role": "system",
+                    "name": "string",
                 }
             ],
             model="gpt-3.5-turbo",

Commit d1616886

Commit `d1616886`