Commit `cc2c1fc1`

stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>

2025-06-17 03:03:28

release: 1.87.0 (#2410) tag: v1.87.0

* chore(internal): codegen related update * chore(tests): add tests for httpx client instantiation & proxies * feat(api): add reusable prompt IDs * fix(client): update service_tier on `client.beta.chat.completions` * chore(internal): update conftest.py * release: 1.87.0 --------- Co-authored-by: stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com> Co-authored-by: David Meadows <dmeadows@stainless.com>

main

1 parent eed877f

Changed files (28)

src

openai

resources

beta

chat

completions.py

chat

completions

completions.py

fine_tuning

jobs

jobs.py

responses

responses.py

images.py

types

chat

chat_completion.py

chat_completion_chunk.py

completion_create_params.py

fine_tuning

job_create_params.py

responses

__init__.py

response.py

response_create_params.py

response_input_item.py

response_prompt.py

response_prompt_param.py

image_edit_params.py

_base_client.py

_version.py

tests

api_resources

.release-please-manifest.json

requirements-dev.lock

@@ -81,7 +81,7 @@ class Completions(SyncAPIResource):
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -228,7 +228,7 @@ class Completions(SyncAPIResource):
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -360,7 +360,7 @@ class AsyncCompletions(AsyncAPIResource):
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -507,7 +507,7 @@ class AsyncCompletions(AsyncAPIResource):
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,

@@ -95,7 +95,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -365,7 +365,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -634,7 +634,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -902,7 +902,7 @@ class Completions(SyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
@@ -1198,7 +1198,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -1468,7 +1468,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1737,7 +1737,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -2005,7 +2005,7 @@ class AsyncCompletions(AsyncAPIResource):
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,

@@ -84,7 +84,7 @@ class Jobs(SyncAPIResource):
         Response includes details of the enqueued job including job status and the name
         of the fine-tuned models once complete.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           model: The name of the model to fine-tune. You can select one of the
@@ -105,7 +105,8 @@ class Jobs(SyncAPIResource):
               [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
               format.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
           hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
@@ -142,7 +143,8 @@ class Jobs(SyncAPIResource):
               Your dataset must be formatted as a JSONL file. You must upload your file with
               the purpose `fine-tune`.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
           extra_headers: Send extra headers
@@ -189,7 +191,7 @@ class Jobs(SyncAPIResource):
         """
         Get info about a fine-tuning job.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           extra_headers: Send extra headers
@@ -462,7 +464,7 @@ class AsyncJobs(AsyncAPIResource):
         Response includes details of the enqueued job including job status and the name
         of the fine-tuned models once complete.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           model: The name of the model to fine-tune. You can select one of the
@@ -483,7 +485,8 @@ class AsyncJobs(AsyncAPIResource):
               [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
               format.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
           hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
@@ -520,7 +523,8 @@ class AsyncJobs(AsyncAPIResource):
               Your dataset must be formatted as a JSONL file. You must upload your file with
               the purpose `fine-tune`.
 
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+              See the
+              [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
               for more details.
 
           extra_headers: Send extra headers
@@ -567,7 +571,7 @@ class AsyncJobs(AsyncAPIResource):
         """
         Get info about a fine-tuning job.
 
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
+        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization)
 
         Args:
           extra_headers: Send extra headers

@@ -41,6 +41,7 @@ from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncRe
 from ...types.responses.response_includable import ResponseIncludable
 from ...types.shared_params.responses_model import ResponsesModel
 from ...types.responses.response_input_param import ResponseInputParam
+from ...types.responses.response_prompt_param import ResponsePromptParam
 from ...types.responses.response_stream_event import ResponseStreamEvent
 from ...types.responses.response_text_config_param import ResponseTextConfigParam
 
@@ -84,8 +85,9 @@ class Responses(SyncAPIResource):
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -152,8 +154,7 @@ class Responses(SyncAPIResource):
               - `code_interpreter_call.outputs`: Includes the outputs of python code execution
                 in code interpreter tool call items.
 
-          instructions: Inserts a system (or developer) message as the first item in the model's
-              context.
+          instructions: A system (or developer) message inserted into the model's context.
 
               When using along with `previous_response_id`, the instructions from a previous
               response will not be carried over to the next response. This makes it simple to
@@ -176,6 +177,9 @@ class Responses(SyncAPIResource):
               multi-turn conversations. Learn more about
               [conversation state](https://platform.openai.com/docs/guides/conversation-state).
 
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
           reasoning: **o-series models only**
 
               Configuration options for
@@ -280,8 +284,9 @@ class Responses(SyncAPIResource):
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -354,8 +359,7 @@ class Responses(SyncAPIResource):
               - `code_interpreter_call.outputs`: Includes the outputs of python code execution
                 in code interpreter tool call items.
 
-          instructions: Inserts a system (or developer) message as the first item in the model's
-              context.
+          instructions: A system (or developer) message inserted into the model's context.
 
               When using along with `previous_response_id`, the instructions from a previous
               response will not be carried over to the next response. This makes it simple to
@@ -378,6 +382,9 @@ class Responses(SyncAPIResource):
               multi-turn conversations. Learn more about
               [conversation state](https://platform.openai.com/docs/guides/conversation-state).
 
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
           reasoning: **o-series models only**
 
               Configuration options for
@@ -475,8 +482,9 @@ class Responses(SyncAPIResource):
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -549,8 +557,7 @@ class Responses(SyncAPIResource):
               - `code_interpreter_call.outputs`: Includes the outputs of python code execution
                 in code interpreter tool call items.
 
-          instructions: Inserts a system (or developer) message as the first item in the model's
-              context.
+          instructions: A system (or developer) message inserted into the model's context.
 
               When using along with `previous_response_id`, the instructions from a previous
               response will not be carried over to the next response. This makes it simple to
@@ -573,6 +580,9 @@ class Responses(SyncAPIResource):
               multi-turn conversations. Learn more about
               [conversation state](https://platform.openai.com/docs/guides/conversation-state).
 
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
           reasoning: **o-series models only**
 
               Configuration options for
@@ -669,8 +679,9 @@ class Responses(SyncAPIResource):
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -700,6 +711,7 @@ class Responses(SyncAPIResource):
                     "metadata": metadata,
                     "parallel_tool_calls": parallel_tool_calls,
                     "previous_response_id": previous_response_id,
+                    "prompt": prompt,
                     "reasoning": reasoning,
                     "service_tier": service_tier,
                     "store": store,
@@ -1292,8 +1304,9 @@ class AsyncResponses(AsyncAPIResource):
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1360,8 +1373,7 @@ class AsyncResponses(AsyncAPIResource):
               - `code_interpreter_call.outputs`: Includes the outputs of python code execution
                 in code interpreter tool call items.
 
-          instructions: Inserts a system (or developer) message as the first item in the model's
-              context.
+          instructions: A system (or developer) message inserted into the model's context.
 
               When using along with `previous_response_id`, the instructions from a previous
               response will not be carried over to the next response. This makes it simple to
@@ -1384,6 +1396,9 @@ class AsyncResponses(AsyncAPIResource):
               multi-turn conversations. Learn more about
               [conversation state](https://platform.openai.com/docs/guides/conversation-state).
 
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
           reasoning: **o-series models only**
 
               Configuration options for
@@ -1488,8 +1503,9 @@ class AsyncResponses(AsyncAPIResource):
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -1562,8 +1578,7 @@ class AsyncResponses(AsyncAPIResource):
               - `code_interpreter_call.outputs`: Includes the outputs of python code execution
                 in code interpreter tool call items.
 
-          instructions: Inserts a system (or developer) message as the first item in the model's
-              context.
+          instructions: A system (or developer) message inserted into the model's context.
 
               When using along with `previous_response_id`, the instructions from a previous
               response will not be carried over to the next response. This makes it simple to
@@ -1586,6 +1601,9 @@ class AsyncResponses(AsyncAPIResource):
               multi-turn conversations. Learn more about
               [conversation state](https://platform.openai.com/docs/guides/conversation-state).
 
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
           reasoning: **o-series models only**
 
               Configuration options for
@@ -1683,8 +1701,9 @@ class AsyncResponses(AsyncAPIResource):
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -1757,8 +1776,7 @@ class AsyncResponses(AsyncAPIResource):
               - `code_interpreter_call.outputs`: Includes the outputs of python code execution
                 in code interpreter tool call items.
 
-          instructions: Inserts a system (or developer) message as the first item in the model's
-              context.
+          instructions: A system (or developer) message inserted into the model's context.
 
               When using along with `previous_response_id`, the instructions from a previous
               response will not be carried over to the next response. This makes it simple to
@@ -1781,6 +1799,9 @@ class AsyncResponses(AsyncAPIResource):
               multi-turn conversations. Learn more about
               [conversation state](https://platform.openai.com/docs/guides/conversation-state).
 
+          prompt: Reference to a prompt template and its variables.
+              [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+
           reasoning: **o-series models only**
 
               Configuration options for
@@ -1877,8 +1898,9 @@ class AsyncResponses(AsyncAPIResource):
         metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1908,6 +1930,7 @@ class AsyncResponses(AsyncAPIResource):
                     "metadata": metadata,
                     "parallel_tool_calls": parallel_tool_calls,
                     "previous_response_id": previous_response_id,
+                    "prompt": prompt,
                     "reasoning": reasoning,
                     "service_tier": service_tier,
                     "store": store,

@@ -123,6 +123,8 @@ class Images(SyncAPIResource):
         mask: FileTypes | NotGiven = NOT_GIVEN,
         model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
+        output_compression: Optional[int] | NotGiven = NOT_GIVEN,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | NotGiven = NOT_GIVEN,
         quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
         size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]]
@@ -171,6 +173,14 @@ class Images(SyncAPIResource):
 
           n: The number of images to generate. Must be between 1 and 10.
 
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. The
+              default value is `png`.
+
           quality: The quality of the image that will be generated. `high`, `medium` and `low` are
               only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
               Defaults to `auto`.
@@ -204,6 +214,8 @@ class Images(SyncAPIResource):
                 "mask": mask,
                 "model": model,
                 "n": n,
+                "output_compression": output_compression,
+                "output_format": output_format,
                 "quality": quality,
                 "response_format": response_format,
                 "size": size,
@@ -447,6 +459,8 @@ class AsyncImages(AsyncAPIResource):
         mask: FileTypes | NotGiven = NOT_GIVEN,
         model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
+        output_compression: Optional[int] | NotGiven = NOT_GIVEN,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | NotGiven = NOT_GIVEN,
         quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
         size: Optional[Literal["256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "auto"]]
@@ -495,6 +509,14 @@ class AsyncImages(AsyncAPIResource):
 
           n: The number of images to generate. Must be between 1 and 10.
 
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
+
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. The
+              default value is `png`.
+
           quality: The quality of the image that will be generated. `high`, `medium` and `low` are
               only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
               Defaults to `auto`.
@@ -528,6 +550,8 @@ class AsyncImages(AsyncAPIResource):
                 "mask": mask,
                 "model": model,
                 "n": n,
+                "output_compression": output_compression,
+                "output_format": output_format,
                 "quality": quality,
                 "response_format": response_format,
                 "size": size,

@@ -59,7 +59,7 @@ class ChatCompletion(BaseModel):
     object: Literal["chat.completion"]
     """The object type, which is always `chat.completion`."""
 
-    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    service_tier: Optional[Literal["auto", "default", "flex", "scale"]] = None
     """Specifies the latency tier to use for processing the request.
 
     This parameter is relevant for customers subscribed to the scale tier service:

@@ -128,7 +128,7 @@ class ChatCompletionChunk(BaseModel):
     object: Literal["chat.completion.chunk"]
     """The object type, which is always `chat.completion.chunk`."""
 
-    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    service_tier: Optional[Literal["auto", "default", "flex", "scale"]] = None
     """Specifies the latency tier to use for processing the request.
 
     This parameter is relevant for customers subscribed to the scale tier service:

@@ -208,7 +208,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     in the backend.
     """
 
-    service_tier: Optional[Literal["auto", "default", "flex"]]
+    service_tier: Optional[Literal["auto", "default", "flex", "scale"]]
     """Specifies the latency tier to use for processing the request.
 
     This parameter is relevant for customers subscribed to the scale tier service:

@@ -37,7 +37,8 @@ class JobCreateParams(TypedDict, total=False):
     [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
     format.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+    See the
+    [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
     for more details.
     """
 
@@ -91,7 +92,8 @@ class JobCreateParams(TypedDict, total=False):
     Your dataset must be formatted as a JSONL file. You must upload your file with
     the purpose `fine-tune`.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+    See the
+    [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization)
     for more details.
     """

@@ -18,6 +18,7 @@ from .parsed_response import (
     ParsedResponseOutputMessage as ParsedResponseOutputMessage,
     ParsedResponseFunctionToolCall as ParsedResponseFunctionToolCall,
 )
+from .response_prompt import ResponsePrompt as ResponsePrompt
 from .response_status import ResponseStatus as ResponseStatus
 from .web_search_tool import WebSearchTool as WebSearchTool
 from .file_search_tool import FileSearchTool as FileSearchTool
@@ -28,6 +29,7 @@ from .computer_tool_param import ComputerToolParam as ComputerToolParam
 from .function_tool_param import FunctionToolParam as FunctionToolParam
 from .response_includable import ResponseIncludable as ResponseIncludable
 from .response_input_file import ResponseInputFile as ResponseInputFile
+from .response_input_item import ResponseInputItem as ResponseInputItem
 from .response_input_text import ResponseInputText as ResponseInputText
 from .tool_choice_options import ToolChoiceOptions as ToolChoiceOptions
 from .response_error_event import ResponseErrorEvent as ResponseErrorEvent
@@ -38,6 +40,7 @@ from .response_output_text import ResponseOutputText as ResponseOutputText
 from .response_text_config import ResponseTextConfig as ResponseTextConfig
 from .tool_choice_function import ToolChoiceFunction as ToolChoiceFunction
 from .response_failed_event import ResponseFailedEvent as ResponseFailedEvent
+from .response_prompt_param import ResponsePromptParam as ResponsePromptParam
 from .response_queued_event import ResponseQueuedEvent as ResponseQueuedEvent
 from .response_stream_event import ResponseStreamEvent as ResponseStreamEvent
 from .web_search_tool_param import WebSearchToolParam as WebSearchToolParam

@@ -7,10 +7,12 @@ from .tool import Tool
 from ..._models import BaseModel
 from .response_error import ResponseError
 from .response_usage import ResponseUsage
+from .response_prompt import ResponsePrompt
 from .response_status import ResponseStatus
 from ..shared.metadata import Metadata
 from ..shared.reasoning import Reasoning
 from .tool_choice_types import ToolChoiceTypes
+from .response_input_item import ResponseInputItem
 from .tool_choice_options import ToolChoiceOptions
 from .response_output_item import ResponseOutputItem
 from .response_text_config import ResponseTextConfig
@@ -41,10 +43,8 @@ class Response(BaseModel):
     incomplete_details: Optional[IncompleteDetails] = None
     """Details about why the response is incomplete."""
 
-    instructions: Optional[str] = None
-    """
-    Inserts a system (or developer) message as the first item in the model's
-    context.
+    instructions: Union[str, List[ResponseInputItem], None] = None
+    """A system (or developer) message inserted into the model's context.
 
     When using along with `previous_response_id`, the instructions from a previous
     response will not be carried over to the next response. This makes it simple to
@@ -148,6 +148,12 @@ class Response(BaseModel):
     [conversation state](https://platform.openai.com/docs/guides/conversation-state).
     """
 
+    prompt: Optional[ResponsePrompt] = None
+    """Reference to a prompt template and its variables.
+
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
     reasoning: Optional[Reasoning] = None
     """**o-series models only**
 
@@ -155,7 +161,7 @@ class Response(BaseModel):
     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
     """
 
-    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    service_tier: Optional[Literal["auto", "default", "flex", "scale"]] = None
     """Specifies the latency tier to use for processing the request.
 
     This parameter is relevant for customers subscribed to the scale tier service:

@@ -9,6 +9,7 @@ from .tool_param import ToolParam
 from .response_includable import ResponseIncludable
 from .tool_choice_options import ToolChoiceOptions
 from .response_input_param import ResponseInputParam
+from .response_prompt_param import ResponsePromptParam
 from ..shared_params.metadata import Metadata
 from .tool_choice_types_param import ToolChoiceTypesParam
 from ..shared_params.reasoning import Reasoning
@@ -72,9 +73,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     """
 
     instructions: Optional[str]
-    """
-    Inserts a system (or developer) message as the first item in the model's
-    context.
+    """A system (or developer) message inserted into the model's context.
 
     When using along with `previous_response_id`, the instructions from a previous
     response will not be carried over to the next response. This makes it simple to
@@ -108,6 +107,12 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     [conversation state](https://platform.openai.com/docs/guides/conversation-state).
     """
 
+    prompt: Optional[ResponsePromptParam]
+    """Reference to a prompt template and its variables.
+
+    [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    """
+
     reasoning: Optional[Reasoning]
     """**o-series models only**
 
@@ -115,7 +120,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
     """
 
-    service_tier: Optional[Literal["auto", "default", "flex"]]
+    service_tier: Optional[Literal["auto", "default", "flex", "scale"]]
     """Specifies the latency tier to use for processing the request.
 
     This parameter is relevant for customers subscribed to the scale tier service:

@@ -0,0 +1,305 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .easy_input_message import EasyInputMessage
+from .response_output_message import ResponseOutputMessage
+from .response_reasoning_item import ResponseReasoningItem
+from .response_computer_tool_call import ResponseComputerToolCall
+from .response_function_tool_call import ResponseFunctionToolCall
+from .response_function_web_search import ResponseFunctionWebSearch
+from .response_file_search_tool_call import ResponseFileSearchToolCall
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+from .response_input_message_content_list import ResponseInputMessageContentList
+from .response_computer_tool_call_output_screenshot import ResponseComputerToolCallOutputScreenshot
+
+__all__ = [
+    "ResponseInputItem",
+    "Message",
+    "ComputerCallOutput",
+    "ComputerCallOutputAcknowledgedSafetyCheck",
+    "FunctionCallOutput",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "LocalShellCallOutput",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "McpApprovalResponse",
+    "McpCall",
+    "ItemReference",
+]
+
+
+class Message(BaseModel):
+    content: ResponseInputMessageContentList
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Literal["user", "system", "developer"]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always set to `message`."""
+
+
+class ComputerCallOutputAcknowledgedSafetyCheck(BaseModel):
+    id: str
+    """The ID of the pending safety check."""
+
+    code: Optional[str] = None
+    """The type of the pending safety check."""
+
+    message: Optional[str] = None
+    """Details about the pending safety check."""
+
+
+class ComputerCallOutput(BaseModel):
+    call_id: str
+    """The ID of the computer tool call that produced the output."""
+
+    output: ResponseComputerToolCallOutputScreenshot
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Literal["computer_call_output"]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    id: Optional[str] = None
+    """The ID of the computer tool call output."""
+
+    acknowledged_safety_checks: Optional[List[ComputerCallOutputAcknowledgedSafetyCheck]] = None
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+
+class FunctionCallOutput(BaseModel):
+    call_id: str
+    """The unique ID of the function tool call generated by the model."""
+
+    output: str
+    """A JSON string of the output of the function tool call."""
+
+    type: Literal["function_call_output"]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    id: Optional[str] = None
+    """The unique ID of the function tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+
+class ImageGenerationCall(BaseModel):
+    id: str
+    """The unique ID of the image generation call."""
+
+    result: Optional[str] = None
+    """The generated image encoded in base64."""
+
+    status: Literal["in_progress", "completed", "generating", "failed"]
+    """The status of the image generation call."""
+
+    type: Literal["image_generation_call"]
+    """The type of the image generation call. Always `image_generation_call`."""
+
+
+class LocalShellCallAction(BaseModel):
+    command: List[str]
+    """The command to run."""
+
+    env: Dict[str, str]
+    """Environment variables to set for the command."""
+
+    type: Literal["exec"]
+    """The type of the local shell action. Always `exec`."""
+
+    timeout_ms: Optional[int] = None
+    """Optional timeout in milliseconds for the command."""
+
+    user: Optional[str] = None
+    """Optional user to run the command as."""
+
+    working_directory: Optional[str] = None
+    """Optional working directory to run the command in."""
+
+
+class LocalShellCall(BaseModel):
+    id: str
+    """The unique ID of the local shell call."""
+
+    action: LocalShellCallAction
+    """Execute a shell command on the server."""
+
+    call_id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the local shell call."""
+
+    type: Literal["local_shell_call"]
+    """The type of the local shell call. Always `local_shell_call`."""
+
+
+class LocalShellCallOutput(BaseModel):
+    id: str
+    """The unique ID of the local shell tool call generated by the model."""
+
+    output: str
+    """A JSON string of the output of the local shell tool call."""
+
+    type: Literal["local_shell_call_output"]
+    """The type of the local shell tool call output. Always `local_shell_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item. One of `in_progress`, `completed`, or `incomplete`."""
+
+
+class McpListToolsTool(BaseModel):
+    input_schema: object
+    """The JSON schema describing the tool's input."""
+
+    name: str
+    """The name of the tool."""
+
+    annotations: Optional[object] = None
+    """Additional annotations about the tool."""
+
+    description: Optional[str] = None
+    """The description of the tool."""
+
+
+class McpListTools(BaseModel):
+    id: str
+    """The unique ID of the list."""
+
+    server_label: str
+    """The label of the MCP server."""
+
+    tools: List[McpListToolsTool]
+    """The tools available on the server."""
+
+    type: Literal["mcp_list_tools"]
+    """The type of the item. Always `mcp_list_tools`."""
+
+    error: Optional[str] = None
+    """Error message if the server could not list tools."""
+
+
+class McpApprovalRequest(BaseModel):
+    id: str
+    """The unique ID of the approval request."""
+
+    arguments: str
+    """A JSON string of arguments for the tool."""
+
+    name: str
+    """The name of the tool to run."""
+
+    server_label: str
+    """The label of the MCP server making the request."""
+
+    type: Literal["mcp_approval_request"]
+    """The type of the item. Always `mcp_approval_request`."""
+
+
+class McpApprovalResponse(BaseModel):
+    approval_request_id: str
+    """The ID of the approval request being answered."""
+
+    approve: bool
+    """Whether the request was approved."""
+
+    type: Literal["mcp_approval_response"]
+    """The type of the item. Always `mcp_approval_response`."""
+
+    id: Optional[str] = None
+    """The unique ID of the approval response"""
+
+    reason: Optional[str] = None
+    """Optional reason for the decision."""
+
+
+class McpCall(BaseModel):
+    id: str
+    """The unique ID of the tool call."""
+
+    arguments: str
+    """A JSON string of the arguments passed to the tool."""
+
+    name: str
+    """The name of the tool that was run."""
+
+    server_label: str
+    """The label of the MCP server running the tool."""
+
+    type: Literal["mcp_call"]
+    """The type of the item. Always `mcp_call`."""
+
+    error: Optional[str] = None
+    """The error from the tool call, if any."""
+
+    output: Optional[str] = None
+    """The output from the tool call."""
+
+
+class ItemReference(BaseModel):
+    id: str
+    """The ID of the item to reference."""
+
+    type: Optional[Literal["item_reference"]] = None
+    """The type of item to reference. Always `item_reference`."""
+
+
+ResponseInputItem: TypeAlias = Annotated[
+    Union[
+        EasyInputMessage,
+        Message,
+        ResponseOutputMessage,
+        ResponseFileSearchToolCall,
+        ResponseComputerToolCall,
+        ComputerCallOutput,
+        ResponseFunctionWebSearch,
+        ResponseFunctionToolCall,
+        FunctionCallOutput,
+        ResponseReasoningItem,
+        ImageGenerationCall,
+        ResponseCodeInterpreterToolCall,
+        LocalShellCall,
+        LocalShellCallOutput,
+        McpListTools,
+        McpApprovalRequest,
+        McpApprovalResponse,
+        McpCall,
+        ItemReference,
+    ],
+    PropertyInfo(discriminator="type"),
+]

@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Union, Optional
+from typing_extensions import TypeAlias
+
+from ..._models import BaseModel
+from .response_input_file import ResponseInputFile
+from .response_input_text import ResponseInputText
+from .response_input_image import ResponseInputImage
+
+__all__ = ["ResponsePrompt", "Variables"]
+
+Variables: TypeAlias = Union[str, ResponseInputText, ResponseInputImage, ResponseInputFile]
+
+
+class ResponsePrompt(BaseModel):
+    id: str
+    """The unique identifier of the prompt template to use."""
+
+    variables: Optional[Dict[str, Variables]] = None
+    """Optional map of values to substitute in for variables in your prompt.
+
+    The substitution values can either be strings, or other Response input types
+    like images or files.
+    """
+
+    version: Optional[str] = None
+    """Optional version of the prompt template."""

@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Required, TypeAlias, TypedDict
+
+from .response_input_file_param import ResponseInputFileParam
+from .response_input_text_param import ResponseInputTextParam
+from .response_input_image_param import ResponseInputImageParam
+
+__all__ = ["ResponsePromptParam", "Variables"]
+
+Variables: TypeAlias = Union[str, ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
+
+
+class ResponsePromptParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique identifier of the prompt template to use."""
+
+    variables: Optional[Dict[str, Variables]]
+    """Optional map of values to substitute in for variables in your prompt.
+
+    The substitution values can either be strings, or other Response input types
+    like images or files.
+    """
+
+    version: Optional[str]
+    """Optional version of the prompt template."""

@@ -58,6 +58,20 @@ class ImageEditParams(TypedDict, total=False):
     n: Optional[int]
     """The number of images to generate. Must be between 1 and 10."""
 
+    output_compression: Optional[int]
+    """The compression level (0-100%) for the generated images.
+
+    This parameter is only supported for `gpt-image-1` with the `webp` or `jpeg`
+    output formats, and defaults to 100.
+    """
+
+    output_format: Optional[Literal["png", "jpeg", "webp"]]
+    """The format in which the generated images are returned.
+
+    This parameter is only supported for `gpt-image-1`. Must be one of `png`,
+    `jpeg`, or `webp`. The default value is `png`.
+    """
+
     quality: Optional[Literal["standard", "low", "medium", "high", "auto"]]
     """The quality of the image that will be generated.

@@ -1088,7 +1088,14 @@ class SyncAPIClient(BaseClient[httpx.Client, Stream[Any]]):
 
         origin = get_origin(cast_to) or cast_to
 
-        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+        if (
+            inspect.isclass(origin)
+            and issubclass(origin, BaseAPIResponse)
+            # we only want to actually return the custom BaseAPIResponse class if we're
+            # returning the raw response, or if we're not streaming SSE, as if we're streaming
+            # SSE then `cast_to` doesn't actively reflect the type we need to parse into
+            and (not stream or bool(response.request.headers.get(RAW_RESPONSE_HEADER)))
+        ):
             if not issubclass(origin, APIResponse):
                 raise TypeError(f"API Response types must subclass {APIResponse}; Received {origin}")
 
@@ -1606,7 +1613,14 @@ class AsyncAPIClient(BaseClient[httpx.AsyncClient, AsyncStream[Any]]):
 
         origin = get_origin(cast_to) or cast_to
 
-        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+        if (
+            inspect.isclass(origin)
+            and issubclass(origin, BaseAPIResponse)
+            # we only want to actually return the custom BaseAPIResponse class if we're
+            # returning the raw response, or if we're not streaming SSE, as if we're streaming
+            # SSE then `cast_to` doesn't actively reflect the type we need to parse into
+            and (not stream or bool(response.request.headers.get(RAW_RESPONSE_HEADER)))
+        ):
             if not issubclass(origin, AsyncAPIResponse):
                 raise TypeError(f"API Response types must subclass {AsyncAPIResponse}; Received {origin}")

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.86.0"  # x-release-please-version
+__version__ = "1.87.0"  # x-release-please-version

@@ -77,6 +77,8 @@ class TestImages:
             mask=b"raw file contents",
             model="string",
             n=1,
+            output_compression=100,
+            output_format="png",
             quality="high",
             response_format="url",
             size="1024x1024",
@@ -223,6 +225,8 @@ class TestAsyncImages:
             mask=b"raw file contents",
             model="string",
             n=1,
+            output_compression=100,
+            output_format="png",
             quality="high",
             response_format="url",
             size="1024x1024",

@@ -9,7 +9,9 @@ import pytest
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai.types.responses import Response
+from openai.types.responses import (
+    Response,
+)
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -37,6 +39,11 @@ class TestResponses:
             metadata={"foo": "string"},
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
             reasoning={
                 "effort": "low",
                 "generate_summary": "auto",
@@ -111,6 +118,11 @@ class TestResponses:
             metadata={"foo": "string"},
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
             reasoning={
                 "effort": "low",
                 "generate_summary": "auto",
@@ -362,6 +374,11 @@ class TestAsyncResponses:
             metadata={"foo": "string"},
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
             reasoning={
                 "effort": "low",
                 "generate_summary": "auto",
@@ -436,6 +453,11 @@ class TestAsyncResponses:
             metadata={"foo": "string"},
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {"foo": "string"},
+                "version": "version",
+            },
             reasoning={
                 "effort": "low",
                 "generate_summary": "auto",

@@ -1,3 +1,5 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os

@@ -28,7 +28,14 @@ from openai._models import BaseModel, FinalRequestOptions
 from openai._constants import RAW_RESPONSE_HEADER
 from openai._streaming import Stream, AsyncStream
 from openai._exceptions import OpenAIError, APIStatusError, APITimeoutError, APIResponseValidationError
-from openai._base_client import DEFAULT_TIMEOUT, HTTPX_DEFAULT_TIMEOUT, BaseClient, make_request_options
+from openai._base_client import (
+    DEFAULT_TIMEOUT,
+    HTTPX_DEFAULT_TIMEOUT,
+    BaseClient,
+    DefaultHttpxClient,
+    DefaultAsyncHttpxClient,
+    make_request_options,
+)
 from openai.types.chat.completion_create_params import CompletionCreateParamsNonStreaming
 
 from .utils import update_env
@@ -908,6 +915,28 @@ class TestOpenAI:
             assert response.retries_taken == failures_before_success
             assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
 
+    def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        # Test that the proxy environment variables are set correctly
+        monkeypatch.setenv("HTTPS_PROXY", "https://example.org")
+
+        client = DefaultHttpxClient()
+
+        mounts = tuple(client._mounts.items())
+        assert len(mounts) == 1
+        assert mounts[0][0].pattern == "https://"
+
+    @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning")
+    def test_default_client_creation(self) -> None:
+        # Ensure that the client can be initialized without any exceptions
+        DefaultHttpxClient(
+            verify=True,
+            cert=None,
+            trust_env=True,
+            http1=True,
+            http2=False,
+            limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
+        )
+
     @pytest.mark.respx(base_url=base_url)
     def test_follow_redirects(self, respx_mock: MockRouter) -> None:
         # Test that the default follow_redirects=True allows following redirects
@@ -1857,6 +1886,28 @@ class TestAsyncOpenAI:
 
                 time.sleep(0.1)
 
+    async def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        # Test that the proxy environment variables are set correctly
+        monkeypatch.setenv("HTTPS_PROXY", "https://example.org")
+
+        client = DefaultAsyncHttpxClient()
+
+        mounts = tuple(client._mounts.items())
+        assert len(mounts) == 1
+        assert mounts[0][0].pattern == "https://"
+
+    @pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning")
+    async def test_default_client_creation(self) -> None:
+        # Ensure that the client can be initialized without any exceptions
+        DefaultAsyncHttpxClient(
+            verify=True,
+            cert=None,
+            trust_env=True,
+            http1=True,
+            http2=False,
+            limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
+        )
+
     @pytest.mark.respx(base_url=base_url)
     async def test_follow_redirects(self, respx_mock: MockRouter) -> None:
         # Test that the default follow_redirects=True allows following redirects

@@ -1,3 +1,3 @@
 {
-  ".": "1.86.0"
+  ".": "1.87.0"
 }
\ No newline at end of file

@@ -1,4 +1,4 @@
 configured_endpoints: 111
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3ae9c18dd7ccfc3ac5206f24394665f563a19015cfa8847b2801a2694d012abc.yml
-openapi_spec_hash: 48175b03b58805cd5c80793c66fd54e5
-config_hash: 4caff63b74a41f71006987db702f2918
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-9e41d2d5471d2c28bff0d616f4476f5b0e6c541ef4cb51bdaaef5fdf5e13c8b2.yml
+openapi_spec_hash: 86f765e18d00e32cf2ce9db7ab84d946
+config_hash: fd2af1d5eff0995bb7dc02ac9a34851d

@@ -750,6 +750,7 @@ from openai.types.responses import (
     ResponseOutputRefusal,
     ResponseOutputText,
     ResponseOutputTextAnnotationAddedEvent,
+    ResponsePrompt,
     ResponseQueuedEvent,
     ResponseReasoningDeltaEvent,
     ResponseReasoningDoneEvent,

@@ -1,5 +1,25 @@
 # Changelog
 
+## 1.87.0 (2025-06-16)
+
+Full Changelog: [v1.86.0...v1.87.0](https://github.com/openai/openai-python/compare/v1.86.0...v1.87.0)
+
+### Features
+
+* **api:** add reusable prompt IDs ([36bfe6e](https://github.com/openai/openai-python/commit/36bfe6e8ae12a31624ba1a360d9260f0aeec448a))
+
+
+### Bug Fixes
+
+* **client:** update service_tier on `client.beta.chat.completions` ([aa488d5](https://github.com/openai/openai-python/commit/aa488d5cf210d8640f87216538d4ff79d7181f2a))
+
+
+### Chores
+
+* **internal:** codegen related update ([b1a31e5](https://github.com/openai/openai-python/commit/b1a31e5ef4387d9f82cf33f9461371651788d381))
+* **internal:** update conftest.py ([bba0213](https://github.com/openai/openai-python/commit/bba0213842a4c161f2235e526d50901a336eecef))
+* **tests:** add tests for httpx client instantiation & proxies ([bc93712](https://github.com/openai/openai-python/commit/bc9371204f457aee9ed9b6ec1b61c2084f32faf1))
+
 ## 1.86.0 (2025-06-10)
 
 Full Changelog: [v1.85.0...v1.86.0](https://github.com/openai/openai-python/compare/v1.85.0...v1.86.0)

@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.86.0"
+version = "1.87.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
@@ -68,6 +68,7 @@ dev-dependencies = [
     "types-pyaudio > 0",
     "trio >=0.22.2",
     "nest_asyncio==1.6.0",
+    "pytest-xdist>=3.6.1",
 ]
 
 [tool.rye.scripts]
@@ -139,7 +140,7 @@ replacement = '[\1](https://github.com/openai/openai-python/tree/main/\g<2>)'
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
-addopts = "--tb=short"
+addopts = "--tb=short -n auto"
 xfail_strict = true
 asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "session"

@@ -54,6 +54,8 @@ exceptiongroup==1.2.2
     # via anyio
     # via pytest
     # via trio
+execnet==2.1.1
+    # via pytest-xdist
 executing==2.1.0
     # via inline-snapshot
 filelock==3.12.4
@@ -129,7 +131,9 @@ pyjwt==2.8.0
 pyright==1.1.399
 pytest==8.3.3
     # via pytest-asyncio
+    # via pytest-xdist
 pytest-asyncio==0.24.0
+pytest-xdist==3.7.0
 python-dateutil==2.8.2
     # via pandas
     # via time-machine

Commit cc2c1fc1

Commit `cc2c1fc1`