Commit `bb95fe35`

@@ -145,13 +145,13 @@ class Runs(SyncAPIResource):
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -166,7 +166,7 @@ class Runs(SyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -190,7 +190,7 @@ class Runs(SyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -201,6 +201,11 @@ class Runs(SyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -287,13 +292,13 @@ class Runs(SyncAPIResource):
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -308,7 +313,7 @@ class Runs(SyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -328,7 +333,7 @@ class Runs(SyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -339,6 +344,11 @@ class Runs(SyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -425,13 +435,13 @@ class Runs(SyncAPIResource):
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -446,7 +456,7 @@ class Runs(SyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -466,7 +476,7 @@ class Runs(SyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -477,6 +487,11 @@ class Runs(SyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1659,13 +1674,13 @@ class AsyncRuns(AsyncAPIResource):
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -1680,7 +1695,7 @@ class AsyncRuns(AsyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1704,7 +1719,7 @@ class AsyncRuns(AsyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1715,6 +1730,11 @@ class AsyncRuns(AsyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1801,13 +1821,13 @@ class AsyncRuns(AsyncAPIResource):
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -1822,7 +1842,7 @@ class AsyncRuns(AsyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1842,7 +1862,7 @@ class AsyncRuns(AsyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1853,6 +1873,11 @@ class AsyncRuns(AsyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1939,13 +1964,13 @@ class AsyncRuns(AsyncAPIResource):
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -1960,7 +1985,7 @@ class AsyncRuns(AsyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1980,7 +2005,7 @@ class AsyncRuns(AsyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1991,6 +2016,11 @@ class AsyncRuns(AsyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request

@@ -340,7 +340,7 @@ class Threads(SyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -366,7 +366,7 @@ class Threads(SyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -382,6 +382,11 @@ class Threads(SyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -481,7 +486,7 @@ class Threads(SyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -503,7 +508,7 @@ class Threads(SyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -519,6 +524,11 @@ class Threads(SyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -618,7 +628,7 @@ class Threads(SyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -640,7 +650,7 @@ class Threads(SyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -656,6 +666,11 @@ class Threads(SyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1296,7 +1311,7 @@ class AsyncThreads(AsyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1322,7 +1337,7 @@ class AsyncThreads(AsyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1338,6 +1353,11 @@ class AsyncThreads(AsyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1437,7 +1457,7 @@ class AsyncThreads(AsyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1459,7 +1479,7 @@ class AsyncThreads(AsyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1475,6 +1495,11 @@ class AsyncThreads(AsyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1574,7 +1599,7 @@ class AsyncThreads(AsyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1596,7 +1621,7 @@ class AsyncThreads(AsyncAPIResource):
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1612,6 +1637,11 @@ class AsyncThreads(AsyncAPIResource):
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request

@@ -109,7 +109,7 @@ class Assistants(SyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -253,7 +253,7 @@ class Assistants(SyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -494,7 +494,7 @@ class AsyncAssistants(AsyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -638,7 +638,7 @@ class AsyncAssistants(AsyncAPIResource):
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.

@@ -162,7 +162,7 @@ class Run(BaseModel):
 
     Compatible with
     [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
@@ -197,7 +197,7 @@ class Run(BaseModel):
     Controls which (if any) tool is called by the model. `none` means the model will
     not call any tools and instead generates a message. `auto` is the default value
     and means the model can pick between generating a message or calling a tool.
-    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    Specifying a particular tool like `{"type": "file_search"}` or
     `{"type": "function", "function": {"name": "my_function"}}` forces the model to
     call that tool.
     """
@@ -210,6 +210,10 @@ class Run(BaseModel):
     """
 
     truncation_strategy: Optional[TruncationStrategy] = None
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
 
     usage: Optional[Usage] = None
     """Usage statistics related to the run.

@@ -49,7 +49,7 @@ class RunCreateParamsBase(TypedDict, total=False):
     The maximum number of completion tokens that may be used over the course of the
     run. The run will make a best effort to use only the number of completion tokens
     specified, across multiple turns of the run. If the run exceeds the number of
-    completion tokens specified, the run will end with status `complete`. See
+    completion tokens specified, the run will end with status `incomplete`. See
     `incomplete_details` for more info.
     """
 
@@ -58,7 +58,7 @@ class RunCreateParamsBase(TypedDict, total=False):
 
     The run will make a best effort to use only the number of prompt tokens
     specified, across multiple turns of the run. If the run exceeds the number of
-    prompt tokens specified, the run will end with status `complete`. See
+    prompt tokens specified, the run will end with status `incomplete`. See
     `incomplete_details` for more info.
     """
 
@@ -106,7 +106,7 @@ class RunCreateParamsBase(TypedDict, total=False):
 
     Compatible with
     [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
@@ -132,7 +132,7 @@ class RunCreateParamsBase(TypedDict, total=False):
     Controls which (if any) tool is called by the model. `none` means the model will
     not call any tools and instead generates a message. `auto` is the default value
     and means the model can pick between generating a message or calling a tool.
-    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    Specifying a particular tool like `{"type": "file_search"}` or
     `{"type": "function", "function": {"name": "my_function"}}` forces the model to
     call that tool.
     """
@@ -148,9 +148,15 @@ class RunCreateParamsBase(TypedDict, total=False):
     An alternative to sampling with temperature, called nucleus sampling, where the
     model considers the results of the tokens with top_p probability mass. So 0.1
     means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
     """
 
     truncation_strategy: Optional[TruncationStrategy]
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
 
 
 class AdditionalMessageAttachment(TypedDict, total=False):

@@ -5,6 +5,7 @@ from typing_extensions import Literal
 
 from ..._models import BaseModel
 from .assistant_tool import AssistantTool
+from .assistant_response_format_option import AssistantResponseFormatOption
 
 __all__ = ["Assistant", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
@@ -81,6 +82,32 @@ class Assistant(BaseModel):
     `code_interpreter`, `file_search`, or `function`.
     """
 
+    response_format: Optional[AssistantResponseFormatOption] = None
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float] = None
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
     tool_resources: Optional[ToolResources] = None
     """A set of resources that are used by the assistant's tools.
 
@@ -88,3 +115,12 @@ class Assistant(BaseModel):
     `code_interpreter` tool requires a list of file IDs, while the `file_search`
     tool requires a list of vector store IDs.
     """
+
+    top_p: Optional[float] = None
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """

@@ -77,7 +77,7 @@ class AssistantCreateParams(TypedDict, total=False):
 
     Compatible with
     [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.

@@ -47,7 +47,7 @@ class AssistantUpdateParams(TypedDict, total=False):
 
     Compatible with
     [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.

@@ -106,7 +106,7 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
 
     Compatible with
     [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
@@ -135,7 +135,7 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     Controls which (if any) tool is called by the model. `none` means the model will
     not call any tools and instead generates a message. `auto` is the default value
     and means the model can pick between generating a message or calling a tool.
-    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    Specifying a particular tool like `{"type": "file_search"}` or
     `{"type": "function", "function": {"name": "my_function"}}` forces the model to
     call that tool.
     """
@@ -159,9 +159,15 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     An alternative to sampling with temperature, called nucleus sampling, where the
     model considers the results of the tokens with top_p probability mass. So 0.1
     means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
     """
 
     truncation_strategy: Optional[TruncationStrategy]
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
 
 
 class ThreadMessageAttachment(TypedDict, total=False):

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.21.0"  # x-release-please-version
+__version__ = "1.21.1"  # x-release-please-version

@@ -1,3 +1,3 @@
 {
-  ".": "1.21.0"
+  ".": "1.21.1"
 }
\ No newline at end of file

@@ -1,5 +1,13 @@
 # Changelog
 
+## 1.21.1 (2024-04-17)
+
+Full Changelog: [v1.21.0...v1.21.1](https://github.com/openai/openai-python/compare/v1.21.0...v1.21.1)
+
+### Chores
+
+* **api:** docs and response_format response property ([#1327](https://github.com/openai/openai-python/issues/1327)) ([7a6d142](https://github.com/openai/openai-python/commit/7a6d142f013994c4eb9a4f55888464c885f8baf0))
+
 ## 1.21.0 (2024-04-17)
 
 Full Changelog: [v1.20.0...v1.21.0](https://github.com/openai/openai-python/compare/v1.20.0...v1.21.0)

@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.21.0"
+version = "1.21.1"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"

Commit bb95fe35

Commit `bb95fe35`