Commit ccae821b

Stainless Bot <107565488+stainless-bot@users.noreply.github.com>
2023-12-17 08:47:42
feat(api): add token logprobs to chat completions (#980)
1 parent eafe8f9
src/openai/resources/chat/completions.py
@@ -63,6 +63,7 @@ class Completions(SyncAPIResource):
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -73,6 +74,7 @@ class Completions(SyncAPIResource):
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -107,7 +109,7 @@ class Completions(SyncAPIResource):
               particular function via `{"name": "my_function"}` forces the model to call that
               function.
 
-              `none` is the default when no functions are present. `auto`` is the default if
+              `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
           functions: Deprecated in favor of `tools`.
@@ -123,7 +125,13 @@ class Completions(SyncAPIResource):
               increase likelihood of selection; values like -100 or 100 should result in a ban
               or exclusive selection of the relevant token.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`. This option is currently not available on the `gpt-4-vision-preview`
+              model.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
 
               The total length of input tokens and generated tokens is limited by the model's
               context length.
@@ -140,7 +148,8 @@ class Completions(SyncAPIResource):
 
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
-          response_format: An object specifying the format that the model must output.
+          response_format: An object specifying the format that the model must output. Compatible with
+              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -188,6 +197,10 @@ class Completions(SyncAPIResource):
               tool. Use this to provide a list of functions the model may generate JSON inputs
               for.
 
+          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
+              at each token position, each with an associated log probability. `logprobs` must
+              be set to `true` if this parameter is used.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -237,6 +250,7 @@ class Completions(SyncAPIResource):
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -246,6 +260,7 @@ class Completions(SyncAPIResource):
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -287,7 +302,7 @@ class Completions(SyncAPIResource):
               particular function via `{"name": "my_function"}` forces the model to call that
               function.
 
-              `none` is the default when no functions are present. `auto`` is the default if
+              `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
           functions: Deprecated in favor of `tools`.
@@ -303,7 +318,13 @@ class Completions(SyncAPIResource):
               increase likelihood of selection; values like -100 or 100 should result in a ban
               or exclusive selection of the relevant token.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`. This option is currently not available on the `gpt-4-vision-preview`
+              model.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
 
               The total length of input tokens and generated tokens is limited by the model's
               context length.
@@ -320,7 +341,8 @@ class Completions(SyncAPIResource):
 
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
-          response_format: An object specifying the format that the model must output.
+          response_format: An object specifying the format that the model must output. Compatible with
+              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -361,6 +383,10 @@ class Completions(SyncAPIResource):
               tool. Use this to provide a list of functions the model may generate JSON inputs
               for.
 
+          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
+              at each token position, each with an associated log probability. `logprobs` must
+              be set to `true` if this parameter is used.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -410,6 +436,7 @@ class Completions(SyncAPIResource):
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -419,6 +446,7 @@ class Completions(SyncAPIResource):
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -460,7 +488,7 @@ class Completions(SyncAPIResource):
               particular function via `{"name": "my_function"}` forces the model to call that
               function.
 
-              `none` is the default when no functions are present. `auto`` is the default if
+              `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
           functions: Deprecated in favor of `tools`.
@@ -476,7 +504,13 @@ class Completions(SyncAPIResource):
               increase likelihood of selection; values like -100 or 100 should result in a ban
               or exclusive selection of the relevant token.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`. This option is currently not available on the `gpt-4-vision-preview`
+              model.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
 
               The total length of input tokens and generated tokens is limited by the model's
               context length.
@@ -493,7 +527,8 @@ class Completions(SyncAPIResource):
 
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
-          response_format: An object specifying the format that the model must output.
+          response_format: An object specifying the format that the model must output. Compatible with
+              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -534,6 +569,10 @@ class Completions(SyncAPIResource):
               tool. Use this to provide a list of functions the model may generate JSON inputs
               for.
 
+          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
+              at each token position, each with an associated log probability. `logprobs` must
+              be set to `true` if this parameter is used.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -582,6 +621,7 @@ class Completions(SyncAPIResource):
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -592,6 +632,7 @@ class Completions(SyncAPIResource):
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -611,6 +652,7 @@ class Completions(SyncAPIResource):
                     "function_call": function_call,
                     "functions": functions,
                     "logit_bias": logit_bias,
+                    "logprobs": logprobs,
                     "max_tokens": max_tokens,
                     "n": n,
                     "presence_penalty": presence_penalty,
@@ -621,6 +663,7 @@ class Completions(SyncAPIResource):
                     "temperature": temperature,
                     "tool_choice": tool_choice,
                     "tools": tools,
+                    "top_logprobs": top_logprobs,
                     "top_p": top_p,
                     "user": user,
                 },
@@ -670,6 +713,7 @@ class AsyncCompletions(AsyncAPIResource):
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -680,6 +724,7 @@ class AsyncCompletions(AsyncAPIResource):
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -714,7 +759,7 @@ class AsyncCompletions(AsyncAPIResource):
               particular function via `{"name": "my_function"}` forces the model to call that
               function.
 
-              `none` is the default when no functions are present. `auto`` is the default if
+              `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
           functions: Deprecated in favor of `tools`.
@@ -730,7 +775,13 @@ class AsyncCompletions(AsyncAPIResource):
               increase likelihood of selection; values like -100 or 100 should result in a ban
               or exclusive selection of the relevant token.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`. This option is currently not available on the `gpt-4-vision-preview`
+              model.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
 
               The total length of input tokens and generated tokens is limited by the model's
               context length.
@@ -747,7 +798,8 @@ class AsyncCompletions(AsyncAPIResource):
 
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
-          response_format: An object specifying the format that the model must output.
+          response_format: An object specifying the format that the model must output. Compatible with
+              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -795,6 +847,10 @@ class AsyncCompletions(AsyncAPIResource):
               tool. Use this to provide a list of functions the model may generate JSON inputs
               for.
 
+          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
+              at each token position, each with an associated log probability. `logprobs` must
+              be set to `true` if this parameter is used.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -844,6 +900,7 @@ class AsyncCompletions(AsyncAPIResource):
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -853,6 +910,7 @@ class AsyncCompletions(AsyncAPIResource):
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -894,7 +952,7 @@ class AsyncCompletions(AsyncAPIResource):
               particular function via `{"name": "my_function"}` forces the model to call that
               function.
 
-              `none` is the default when no functions are present. `auto`` is the default if
+              `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
           functions: Deprecated in favor of `tools`.
@@ -910,7 +968,13 @@ class AsyncCompletions(AsyncAPIResource):
               increase likelihood of selection; values like -100 or 100 should result in a ban
               or exclusive selection of the relevant token.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`. This option is currently not available on the `gpt-4-vision-preview`
+              model.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
 
               The total length of input tokens and generated tokens is limited by the model's
               context length.
@@ -927,7 +991,8 @@ class AsyncCompletions(AsyncAPIResource):
 
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
-          response_format: An object specifying the format that the model must output.
+          response_format: An object specifying the format that the model must output. Compatible with
+              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -968,6 +1033,10 @@ class AsyncCompletions(AsyncAPIResource):
               tool. Use this to provide a list of functions the model may generate JSON inputs
               for.
 
+          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
+              at each token position, each with an associated log probability. `logprobs` must
+              be set to `true` if this parameter is used.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -1017,6 +1086,7 @@ class AsyncCompletions(AsyncAPIResource):
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1026,6 +1096,7 @@ class AsyncCompletions(AsyncAPIResource):
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1067,7 +1138,7 @@ class AsyncCompletions(AsyncAPIResource):
               particular function via `{"name": "my_function"}` forces the model to call that
               function.
 
-              `none` is the default when no functions are present. `auto`` is the default if
+              `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
           functions: Deprecated in favor of `tools`.
@@ -1083,7 +1154,13 @@ class AsyncCompletions(AsyncAPIResource):
               increase likelihood of selection; values like -100 or 100 should result in a ban
               or exclusive selection of the relevant token.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`. This option is currently not available on the `gpt-4-vision-preview`
+              model.
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion.
 
               The total length of input tokens and generated tokens is limited by the model's
               context length.
@@ -1100,7 +1177,8 @@ class AsyncCompletions(AsyncAPIResource):
 
               [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
 
-          response_format: An object specifying the format that the model must output.
+          response_format: An object specifying the format that the model must output. Compatible with
+              `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1141,6 +1219,10 @@ class AsyncCompletions(AsyncAPIResource):
               tool. Use this to provide a list of functions the model may generate JSON inputs
               for.
 
+          top_logprobs: An integer between 0 and 5 specifying the number of most likely tokens to return
+              at each token position, each with an associated log probability. `logprobs` must
+              be set to `true` if this parameter is used.
+
           top_p: An alternative to sampling with temperature, called nucleus sampling, where the
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
@@ -1189,6 +1271,7 @@ class AsyncCompletions(AsyncAPIResource):
         function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
         logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
         max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1199,6 +1282,7 @@ class AsyncCompletions(AsyncAPIResource):
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
         tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1218,6 +1302,7 @@ class AsyncCompletions(AsyncAPIResource):
                     "function_call": function_call,
                     "functions": functions,
                     "logit_bias": logit_bias,
+                    "logprobs": logprobs,
                     "max_tokens": max_tokens,
                     "n": n,
                     "presence_penalty": presence_penalty,
@@ -1228,6 +1313,7 @@ class AsyncCompletions(AsyncAPIResource):
                     "temperature": temperature,
                     "tool_choice": tool_choice,
                     "tools": tools,
+                    "top_logprobs": top_logprobs,
                     "top_p": top_p,
                     "user": user,
                 },
src/openai/resources/completions.py
@@ -119,14 +119,15 @@ class Completions(SyncAPIResource):
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -288,14 +289,15 @@ class Completions(SyncAPIResource):
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -450,14 +452,15 @@ class Completions(SyncAPIResource):
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -687,14 +690,15 @@ class AsyncCompletions(AsyncAPIResource):
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -856,14 +860,15 @@ class AsyncCompletions(AsyncAPIResource):
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
@@ -1018,14 +1023,15 @@ class AsyncCompletions(AsyncAPIResource):
               As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
               from being generated.
 
-          logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the
-              chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-              the 5 most likely tokens. The API will always return the `logprob` of the
-              sampled token, so there may be up to `logprobs+1` elements in the response.
+          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
+              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+              list of the 5 most likely tokens. The API will always return the `logprob` of
+              the sampled token, so there may be up to `logprobs+1` elements in the response.
 
               The maximum value for `logprobs` is 5.
 
-          max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion.
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
+              completion.
 
               The token count of your prompt plus `max_tokens` cannot exceed the model's
               context length.
src/openai/resources/files.py
@@ -51,7 +51,8 @@ class Files(SyncAPIResource):
         The size of all the
         files uploaded by one organization can be up to 100 GB.
 
-        The size of individual files can be a maximum of 512 MB. See the
+        The size of individual files can be a maximum of 512 MB or 2 million tokens for
+        Assistants. See the
         [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
         learn more about the types of files supported. The Fine-tuning API only supports
         `.jsonl` files.
@@ -314,7 +315,8 @@ class AsyncFiles(AsyncAPIResource):
         The size of all the
         files uploaded by one organization can be up to 100 GB.
 
-        The size of individual files can be a maximum of 512 MB. See the
+        The size of individual files can be a maximum of 512 MB or 2 million tokens for
+        Assistants. See the
         [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
         learn more about the types of files supported. The Fine-tuning API only supports
         `.jsonl` files.
src/openai/types/beta/threads/runs/message_creation_step_details.py
@@ -16,4 +16,4 @@ class MessageCreationStepDetails(BaseModel):
     message_creation: MessageCreation
 
     type: Literal["message_creation"]
-    """Always `message_creation``."""
+    """Always `message_creation`."""
src/openai/types/beta/threads/runs/run_step.py
@@ -66,7 +66,7 @@ class RunStep(BaseModel):
     """
 
     object: Literal["thread.run.step"]
-    """The object type, which is always `thread.run.step``."""
+    """The object type, which is always `thread.run.step`."""
 
     run_id: str
     """
src/openai/types/chat/__init__.py
@@ -13,6 +13,9 @@ from .chat_completion_tool_param import (
 from .chat_completion_message_param import (
     ChatCompletionMessageParam as ChatCompletionMessageParam,
 )
+from .chat_completion_token_logprob import (
+    ChatCompletionTokenLogprob as ChatCompletionTokenLogprob,
+)
 from .chat_completion_message_tool_call import (
     ChatCompletionMessageToolCall as ChatCompletionMessageToolCall,
 )
src/openai/types/chat/chat_completion.py
@@ -6,8 +6,14 @@ from typing_extensions import Literal
 from ..._models import BaseModel
 from ..completion_usage import CompletionUsage
 from .chat_completion_message import ChatCompletionMessage
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
 
-__all__ = ["ChatCompletion", "Choice"]
+__all__ = ["ChatCompletion", "Choice", "ChoiceLogprobs"]
+
+
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChatCompletionTokenLogprob]]
+    """A list of message content tokens with log probability information."""
 
 
 class Choice(BaseModel):
@@ -24,6 +30,9 @@ class Choice(BaseModel):
     index: int
     """The index of the choice in the list of choices."""
 
+    logprobs: Optional[ChoiceLogprobs]
+    """Log probability information for the choice."""
+
     message: ChatCompletionMessage
     """A chat completion message generated by the model."""
 
src/openai/types/chat/chat_completion_chunk.py
@@ -4,6 +4,7 @@ from typing import List, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
 
 __all__ = [
     "ChatCompletionChunk",
@@ -12,6 +13,7 @@ __all__ = [
     "ChoiceDeltaFunctionCall",
     "ChoiceDeltaToolCall",
     "ChoiceDeltaToolCallFunction",
+    "ChoiceLogprobs",
 ]
 
 
@@ -70,6 +72,11 @@ class ChoiceDelta(BaseModel):
     tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
 
 
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChatCompletionTokenLogprob]]
+    """A list of message content tokens with log probability information."""
+
+
 class Choice(BaseModel):
     delta: ChoiceDelta
     """A chat completion delta generated by streamed model responses."""
@@ -87,6 +94,9 @@ class Choice(BaseModel):
     index: int
     """The index of the choice in the list of choices."""
 
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice."""
+
 
 class ChatCompletionChunk(BaseModel):
     id: str
src/openai/types/chat/chat_completion_function_message_param.py
@@ -2,13 +2,14 @@
 
 from __future__ import annotations
 
+from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
 __all__ = ["ChatCompletionFunctionMessageParam"]
 
 
 class ChatCompletionFunctionMessageParam(TypedDict, total=False):
-    content: Required[str]
+    content: Required[Optional[str]]
     """The contents of the function message."""
 
     name: Required[str]
src/openai/types/chat/chat_completion_token_logprob.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionTokenLogprob", "TopLogprob"]
+
+
+class TopLogprob(BaseModel):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]]
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token."""
+
+
+class ChatCompletionTokenLogprob(BaseModel):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]]
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token."""
+
+    top_logprobs: List[TopLogprob]
+    """List of the most likely tokens and their log probability, at this token
+    position.
+
+    In rare cases, there may be fewer than the number of requested `top_logprobs`
+    returned.
+    """
src/openai/types/chat/completion_create_params.py
@@ -78,7 +78,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     particular function via `{"name": "my_function"}` forces the model to call that
     function.
 
-    `none` is the default when no functions are present. `auto`` is the default if
+    `none` is the default when no functions are present. `auto` is the default if
     functions are present.
     """
 
@@ -99,8 +99,18 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     or exclusive selection of the relevant token.
     """
 
+    logprobs: Optional[bool]
+    """Whether to return log probabilities of the output tokens or not.
+
+    If true, returns the log probabilities of each output token returned in the
+    `content` of `message`. This option is currently not available on the
+    `gpt-4-vision-preview` model.
+    """
+
     max_tokens: Optional[int]
-    """The maximum number of [tokens](/tokenizer) to generate in the chat completion.
+    """
+    The maximum number of [tokens](/tokenizer) that can be generated in the chat
+    completion.
 
     The total length of input tokens and generated tokens is limited by the model's
     context length.
@@ -127,6 +137,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     response_format: ResponseFormat
     """An object specifying the format that the model must output.
 
+    Compatible with `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
+
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
 
@@ -180,6 +192,13 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     functions the model may generate JSON inputs for.
     """
 
+    top_logprobs: Optional[int]
+    """
+    An integer between 0 and 5 specifying the number of most likely tokens to return
+    at each token position, each with an associated log probability. `logprobs` must
+    be set to `true` if this parameter is used.
+    """
+
     top_p: Optional[float]
     """
     An alternative to sampling with temperature, called nucleus sampling, where the
src/openai/types/completion_create_params.py
@@ -88,16 +88,18 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     logprobs: Optional[int]
     """
-    Include the log probabilities on the `logprobs` most likely tokens, as well the
-    chosen tokens. For example, if `logprobs` is 5, the API will return a list of
-    the 5 most likely tokens. The API will always return the `logprob` of the
-    sampled token, so there may be up to `logprobs+1` elements in the response.
+    Include the log probabilities on the `logprobs` most likely output tokens, as
+    well the chosen tokens. For example, if `logprobs` is 5, the API will return a
+    list of the 5 most likely tokens. The API will always return the `logprob` of
+    the sampled token, so there may be up to `logprobs+1` elements in the response.
 
     The maximum value for `logprobs` is 5.
     """
 
     max_tokens: Optional[int]
-    """The maximum number of [tokens](/tokenizer) to generate in the completion.
+    """
+    The maximum number of [tokens](/tokenizer) that can be generated in the
+    completion.
 
     The token count of your prompt plus `max_tokens` cannot exceed the model's
     context length.
tests/api_resources/chat/test_completions.py
@@ -54,6 +54,7 @@ class TestCompletions:
                 }
             ],
             logit_bias={"foo": 0},
+            logprobs=True,
             max_tokens=0,
             n=1,
             presence_penalty=-2,
@@ -89,6 +90,7 @@ class TestCompletions:
                     },
                 },
             ],
+            top_logprobs=0,
             top_p=1,
             user="user-1234",
         )
@@ -144,6 +146,7 @@ class TestCompletions:
                 }
             ],
             logit_bias={"foo": 0},
+            logprobs=True,
             max_tokens=0,
             n=1,
             presence_penalty=-2,
@@ -178,6 +181,7 @@ class TestCompletions:
                     },
                 },
             ],
+            top_logprobs=0,
             top_p=1,
             user="user-1234",
         )
@@ -237,6 +241,7 @@ class TestAsyncCompletions:
                 }
             ],
             logit_bias={"foo": 0},
+            logprobs=True,
             max_tokens=0,
             n=1,
             presence_penalty=-2,
@@ -272,6 +277,7 @@ class TestAsyncCompletions:
                     },
                 },
             ],
+            top_logprobs=0,
             top_p=1,
             user="user-1234",
         )
@@ -327,6 +333,7 @@ class TestAsyncCompletions:
                 }
             ],
             logit_bias={"foo": 0},
+            logprobs=True,
             max_tokens=0,
             n=1,
             presence_penalty=-2,
@@ -361,6 +368,7 @@ class TestAsyncCompletions:
                     },
                 },
             ],
+            top_logprobs=0,
             top_p=1,
             user="user-1234",
         )
api.md
@@ -38,6 +38,7 @@ from openai.types.chat import (
     ChatCompletionNamedToolChoice,
     ChatCompletionRole,
     ChatCompletionSystemMessageParam,
+    ChatCompletionTokenLogprob,
     ChatCompletionTool,
     ChatCompletionToolChoiceOption,
     ChatCompletionToolMessageParam,