Commit f588695f

stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>
2025-05-23 04:07:15
release: 1.82.0 (#2372) tag: v1.82.0
* Add background streaming * -m rest of the implementation * docs(readme): fix async example * docs(readme): another async example fix * fix(azure): mark images/edits as a deployment endpoint #2371 * feat(api): new streaming helpers for background responses * release: 1.82.0 --------- Co-authored-by: pakrym-oai <pakrym@openai.com> Co-authored-by: Robert Craigie <robert@craigie.dev> Co-authored-by: Kevin Whinnery <kwhinnery@openai.com> Co-authored-by: Friedel van Megen <fmegen@microsoft.com> Co-authored-by: stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>
1 parent 71058dd
examples/responses/background.py
@@ -0,0 +1,46 @@
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai import OpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+client = OpenAI()
+id = None
+
+with client.responses.create(
+    input="solve 8x + 31 = 2",
+    model="gpt-4o-2024-08-06",
+    background=True,
+    stream=True,
+) as stream:
+    for event in stream:
+        if event.type == "response.created":
+            id = event.response.id
+        if "output_text" in event.type:
+            rich.print(event)
+        if event.sequence_number == 10:
+            break
+
+print("Interrupted. Continuing...")
+
+assert id is not None
+with client.responses.retrieve(
+    response_id=id,
+    stream=True,
+    starting_after=10,
+) as stream:
+    for event in stream:
+        if "output_text" in event.type:
+            rich.print(event)
examples/responses/background_async.py
@@ -0,0 +1,52 @@
+import asyncio
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai._client import AsyncOpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+async def main() -> None:
+    client = AsyncOpenAI()
+    id = None
+
+    async with await client.responses.create(
+        input="solve 8x + 31 = 2",
+        model="gpt-4o-2024-08-06",
+        background=True,
+        stream=True,
+    ) as stream:
+        async for event in stream:
+            if event.type == "response.created":
+                id = event.response.id
+            if "output_text" in event.type:
+                rich.print(event)
+            if event.sequence_number == 10:
+                break
+
+    print("Interrupted. Continuing...")
+
+    assert id is not None
+    async with await client.responses.retrieve(
+        response_id=id,
+        stream=True,
+        starting_after=10,
+    ) as stream:
+        async for event in stream:
+            if "output_text" in event.type:
+                rich.print(event)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
examples/responses/background_streaming.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env -S rye run python
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai import OpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+client = OpenAI()
+id = None
+with client.responses.stream(
+    input="solve 8x + 31 = 2",
+    model="gpt-4o-2024-08-06",
+    text_format=MathResponse,
+    background=True,
+) as stream:
+    for event in stream:
+        if event.type == "response.created":
+            id = event.response.id
+        if "output_text" in event.type:
+            rich.print(event)
+        if event.sequence_number == 10:
+            break
+
+print("Interrupted. Continuing...")
+
+assert id is not None
+with client.responses.stream(
+    response_id=id,
+    starting_after=10,
+    text_format=MathResponse,
+) as stream:
+    for event in stream:
+        if "output_text" in event.type:
+            rich.print(event)
+
+    rich.print(stream.get_final_response())
examples/responses/background_streaming_async.py
@@ -0,0 +1,53 @@
+import asyncio
+from typing import List
+
+import rich
+from pydantic import BaseModel
+
+from openai import AsyncOpenAI
+
+
+class Step(BaseModel):
+    explanation: str
+    output: str
+
+
+class MathResponse(BaseModel):
+    steps: List[Step]
+    final_answer: str
+
+
+async def main() -> None:
+    client = AsyncOpenAI()
+    id = None
+    async with client.responses.stream(
+        input="solve 8x + 31 = 2",
+        model="gpt-4o-2024-08-06",
+        text_format=MathResponse,
+        background=True,
+    ) as stream:
+        async for event in stream:
+            if event.type == "response.created":
+                id = event.response.id
+            if "output_text" in event.type:
+                rich.print(event)
+            if event.sequence_number == 10:
+                break
+
+    print("Interrupted. Continuing...")
+
+    assert id is not None
+    async with client.responses.stream(
+        response_id=id,
+        starting_after=10,
+        text_format=MathResponse,
+    ) as stream:
+        async for event in stream:
+            if "output_text" in event.type:
+                rich.print(event)
+
+        rich.print(stream.get_final_response())
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
src/openai/lib/streaming/responses/_events.py
@@ -31,7 +31,6 @@ from ....types.responses import (
     ResponseMcpCallInProgressEvent,
     ResponseMcpListToolsFailedEvent,
     ResponseAudioTranscriptDoneEvent,
-    ResponseTextAnnotationDeltaEvent,
     ResponseAudioTranscriptDeltaEvent,
     ResponseMcpCallArgumentsDoneEvent,
     ResponseReasoningSummaryDoneEvent,
@@ -118,7 +117,6 @@ ResponseStreamEvent: TypeAlias = Annotated[
         ResponseOutputItemDoneEvent,
         ResponseRefusalDeltaEvent,
         ResponseRefusalDoneEvent,
-        ResponseTextAnnotationDeltaEvent,
         ResponseTextDoneEvent,
         ResponseWebSearchCallCompletedEvent,
         ResponseWebSearchCallInProgressEvent,
src/openai/lib/streaming/responses/_responses.py
@@ -34,11 +34,13 @@ class ResponseStream(Generic[TextFormatT]):
         raw_stream: Stream[RawResponseStreamEvent],
         text_format: type[TextFormatT] | NotGiven,
         input_tools: Iterable[ToolParam] | NotGiven,
+        starting_after: int | None,
     ) -> None:
         self._raw_stream = raw_stream
         self._response = raw_stream.response
         self._iterator = self.__stream__()
         self._state = ResponseStreamState(text_format=text_format, input_tools=input_tools)
+        self._starting_after = starting_after
 
     def __next__(self) -> ResponseStreamEvent[TextFormatT]:
         return self._iterator.__next__()
@@ -54,7 +56,8 @@ class ResponseStream(Generic[TextFormatT]):
         for sse_event in self._raw_stream:
             events_to_fire = self._state.handle_event(sse_event)
             for event in events_to_fire:
-                yield event
+                if self._starting_after is None or event.sequence_number > self._starting_after:
+                    yield event
 
     def __exit__(
         self,
@@ -96,11 +99,13 @@ class ResponseStreamManager(Generic[TextFormatT]):
         *,
         text_format: type[TextFormatT] | NotGiven,
         input_tools: Iterable[ToolParam] | NotGiven,
+        starting_after: int | None,
     ) -> None:
         self.__stream: ResponseStream[TextFormatT] | None = None
         self.__api_request = api_request
         self.__text_format = text_format
         self.__input_tools = input_tools
+        self.__starting_after = starting_after
 
     def __enter__(self) -> ResponseStream[TextFormatT]:
         raw_stream = self.__api_request()
@@ -109,6 +114,7 @@ class ResponseStreamManager(Generic[TextFormatT]):
             raw_stream=raw_stream,
             text_format=self.__text_format,
             input_tools=self.__input_tools,
+            starting_after=self.__starting_after,
         )
 
         return self.__stream
@@ -130,11 +136,13 @@ class AsyncResponseStream(Generic[TextFormatT]):
         raw_stream: AsyncStream[RawResponseStreamEvent],
         text_format: type[TextFormatT] | NotGiven,
         input_tools: Iterable[ToolParam] | NotGiven,
+        starting_after: int | None,
     ) -> None:
         self._raw_stream = raw_stream
         self._response = raw_stream.response
         self._iterator = self.__stream__()
         self._state = ResponseStreamState(text_format=text_format, input_tools=input_tools)
+        self._starting_after = starting_after
 
     async def __anext__(self) -> ResponseStreamEvent[TextFormatT]:
         return await self._iterator.__anext__()
@@ -147,7 +155,8 @@ class AsyncResponseStream(Generic[TextFormatT]):
         async for sse_event in self._raw_stream:
             events_to_fire = self._state.handle_event(sse_event)
             for event in events_to_fire:
-                yield event
+                if self._starting_after is None or event.sequence_number > self._starting_after:
+                    yield event
 
     async def __aenter__(self) -> Self:
         return self
@@ -192,11 +201,13 @@ class AsyncResponseStreamManager(Generic[TextFormatT]):
         *,
         text_format: type[TextFormatT] | NotGiven,
         input_tools: Iterable[ToolParam] | NotGiven,
+        starting_after: int | None,
     ) -> None:
         self.__stream: AsyncResponseStream[TextFormatT] | None = None
         self.__api_request = api_request
         self.__text_format = text_format
         self.__input_tools = input_tools
+        self.__starting_after = starting_after
 
     async def __aenter__(self) -> AsyncResponseStream[TextFormatT]:
         raw_stream = await self.__api_request
@@ -205,6 +216,7 @@ class AsyncResponseStreamManager(Generic[TextFormatT]):
             raw_stream=raw_stream,
             text_format=self.__text_format,
             input_tools=self.__input_tools,
+            starting_after=self.__starting_after,
         )
 
         return self.__stream
src/openai/lib/azure.py
@@ -25,6 +25,7 @@ _deployments_endpoints = set(
         "/audio/translations",
         "/audio/speech",
         "/images/generations",
+        "/images/edits",
     ]
 )
 
src/openai/resources/beta/realtime/realtime.py
@@ -820,7 +820,7 @@ class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
         stop generating audio and emit a `output_audio_buffer.cleared` event. This
         event should be preceded by a `response.cancel` client event to stop the
         generation of the current response.
-        [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc).
+        [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
         """
         self._connection.send(
             cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
@@ -1072,7 +1072,7 @@ class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource
         stop generating audio and emit a `output_audio_buffer.cleared` event. This
         event should be preceded by a `response.cancel` client event to stop the
         generation of the current response.
-        [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc).
+        [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
         """
         await self._connection.send(
             cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
src/openai/resources/chat/completions/completions.py
@@ -323,8 +323,8 @@ class Completions(SyncAPIResource):
 
               We generally recommend altering this or `temperature` but not both.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           web_search_options: This tool searches the web for relevant results to use in a response. Learn more
@@ -592,8 +592,8 @@ class Completions(SyncAPIResource):
 
               We generally recommend altering this or `temperature` but not both.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           web_search_options: This tool searches the web for relevant results to use in a response. Learn more
@@ -861,8 +861,8 @@ class Completions(SyncAPIResource):
 
               We generally recommend altering this or `temperature` but not both.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           web_search_options: This tool searches the web for relevant results to use in a response. Learn more
@@ -1426,8 +1426,8 @@ class AsyncCompletions(AsyncAPIResource):
 
               We generally recommend altering this or `temperature` but not both.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           web_search_options: This tool searches the web for relevant results to use in a response. Learn more
@@ -1695,8 +1695,8 @@ class AsyncCompletions(AsyncAPIResource):
 
               We generally recommend altering this or `temperature` but not both.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           web_search_options: This tool searches the web for relevant results to use in a response. Learn more
@@ -1964,8 +1964,8 @@ class AsyncCompletions(AsyncAPIResource):
 
               We generally recommend altering this or `temperature` but not both.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           web_search_options: This tool searches the web for relevant results to use in a response. Learn more
src/openai/resources/containers/files/files.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from typing import Mapping, cast
 from typing_extensions import Literal
 
 import httpx
@@ -16,7 +17,7 @@ from .content import (
     AsyncContentWithStreamingResponse,
 )
 from ...._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven, FileTypes
-from ...._utils import maybe_transform, async_maybe_transform
+from ...._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -88,15 +89,21 @@ class Files(SyncAPIResource):
         """
         if not container_id:
             raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "file_id": file_id,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             f"/containers/{container_id}/files",
-            body=maybe_transform(
-                {
-                    "file": file,
-                    "file_id": file_id,
-                },
-                file_create_params.FileCreateParams,
-            ),
+            body=maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -295,15 +302,21 @@ class AsyncFiles(AsyncAPIResource):
         """
         if not container_id:
             raise ValueError(f"Expected a non-empty value for `container_id` but received {container_id!r}")
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "file_id": file_id,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             f"/containers/{container_id}/files",
-            body=await async_maybe_transform(
-                {
-                    "file": file,
-                    "file_id": file_id,
-                },
-                file_create_params.FileCreateParams,
-            ),
+            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
src/openai/resources/responses/input_items.py
@@ -72,7 +72,7 @@ class InputItems(SyncAPIResource):
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
 
-          order: The order to return the input items in. Default is `asc`.
+          order: The order to return the input items in. Default is `desc`.
 
               - `asc`: Return the input items in ascending order.
               - `desc`: Return the input items in descending order.
@@ -160,7 +160,7 @@ class AsyncInputItems(AsyncAPIResource):
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
 
-          order: The order to return the input items in. Default is `asc`.
+          order: The order to return the input items in. Default is `desc`.
 
               - `asc`: Return the input items in ascending order.
               - `desc`: Return the input items in descending order.
src/openai/resources/responses/responses.py
@@ -250,8 +250,8 @@ class Responses(SyncAPIResource):
               - `disabled` (default): If a model response will exceed the context window size
                 for a model, the request will fail with a 400 error.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
@@ -443,8 +443,8 @@ class Responses(SyncAPIResource):
               - `disabled` (default): If a model response will exceed the context window size
                 for a model, the request will fail with a 400 error.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
@@ -636,8 +636,8 @@ class Responses(SyncAPIResource):
               - `disabled` (default): If a model response will exceed the context window size
                 for a model, the request will fail with a 400 error.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
@@ -718,11 +718,28 @@ class Responses(SyncAPIResource):
             stream_cls=Stream[ResponseStreamEvent],
         )
 
+    @overload
+    def stream(
+        self,
+        *,
+        response_id: str,
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ResponseStreamManager[TextFormatT]: ...
+
+    @overload
     def stream(
         self,
         *,
         input: Union[str, ResponseInputParam],
         model: Union[str, ChatModel],
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
         text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
         tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
         include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
@@ -745,49 +762,129 @@ class Responses(SyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseStreamManager[TextFormatT]:
-        if is_given(text_format):
-            if not text:
-                text = {}
-
-            if "format" in text:
-                raise TypeError("Cannot mix and match text.format with text_format")
-
-            text["format"] = _type_to_text_format_param(text_format)
+    ) -> ResponseStreamManager[TextFormatT]: ...
 
+    def stream(
+        self,
+        *,
+        response_id: str | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel] | NotGiven = NOT_GIVEN,
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ResponseStreamManager[TextFormatT]:
+        new_response_args = {
+            "input": input,
+            "model": model,
+            "include": include,
+            "instructions": instructions,
+            "max_output_tokens": max_output_tokens,
+            "metadata": metadata,
+            "parallel_tool_calls": parallel_tool_calls,
+            "previous_response_id": previous_response_id,
+            "reasoning": reasoning,
+            "store": store,
+            "temperature": temperature,
+            "text": text,
+            "tool_choice": tool_choice,
+            "top_p": top_p,
+            "truncation": truncation,
+            "user": user,
+            "background": background,
+        }
+        new_response_args_names = [k for k, v in new_response_args.items() if is_given(v)]
+
+        if (is_given(response_id) or is_given(starting_after)) and len(new_response_args_names) > 0:
+            raise ValueError(
+                "Cannot provide both response_id/starting_after can't be provided together with "
+                + ", ".join(new_response_args_names)
+            )
         tools = _make_tools(tools)
+        if len(new_response_args_names) > 0:
+            if not is_given(input):
+                raise ValueError("input must be provided when creating a new response")
+
+            if not is_given(model):
+                raise ValueError("model must be provided when creating a new response")
+
+            if is_given(text_format):
+                if not text:
+                    text = {}
+
+                if "format" in text:
+                    raise TypeError("Cannot mix and match text.format with text_format")
+
+                text["format"] = _type_to_text_format_param(text_format)
+
+            api_request: partial[Stream[ResponseStreamEvent]] = partial(
+                self.create,
+                input=input,
+                model=model,
+                tools=tools,
+                include=include,
+                instructions=instructions,
+                max_output_tokens=max_output_tokens,
+                metadata=metadata,
+                parallel_tool_calls=parallel_tool_calls,
+                previous_response_id=previous_response_id,
+                store=store,
+                stream=True,
+                temperature=temperature,
+                text=text,
+                tool_choice=tool_choice,
+                reasoning=reasoning,
+                top_p=top_p,
+                truncation=truncation,
+                user=user,
+                background=background,
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            )
 
-        api_request: partial[Stream[ResponseStreamEvent]] = partial(
-            self.create,
-            input=input,
-            model=model,
-            tools=tools,
-            include=include,
-            instructions=instructions,
-            max_output_tokens=max_output_tokens,
-            metadata=metadata,
-            parallel_tool_calls=parallel_tool_calls,
-            previous_response_id=previous_response_id,
-            store=store,
-            stream=True,
-            temperature=temperature,
-            text=text,
-            tool_choice=tool_choice,
-            reasoning=reasoning,
-            top_p=top_p,
-            truncation=truncation,
-            user=user,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-
-        return ResponseStreamManager(
-            api_request,
-            text_format=text_format,
-            input_tools=tools,
-        )
+            return ResponseStreamManager(api_request, text_format=text_format, input_tools=tools, starting_after=None)
+        else:
+            if not is_given(response_id):
+                raise ValueError("id must be provided when streaming an existing response")
+
+            return ResponseStreamManager(
+                lambda: self.retrieve(
+                    response_id=response_id,
+                    stream=True,
+                    include=include or [],
+                    extra_headers=extra_headers,
+                    extra_query=extra_query,
+                    extra_body=extra_body,
+                    starting_after=NOT_GIVEN,
+                    timeout=timeout,
+                ),
+                text_format=text_format,
+                input_tools=tools,
+                starting_after=starting_after if is_given(starting_after) else None,
+            )
 
     def parse(
         self,
@@ -873,6 +970,7 @@ class Responses(SyncAPIResource):
             cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
         )
 
+    @overload
     def retrieve(
         self,
         response_id: str,
@@ -884,7 +982,54 @@ class Responses(SyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Response:
+    ) -> Response: ...
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: Literal[True],
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ResponseStreamEvent]: ...
+
+    @overload
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]: ...
+
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool = False,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
         """
         Retrieves a model response with the given ID.
 
@@ -892,6 +1037,16 @@ class Responses(SyncAPIResource):
           include: Additional fields to include in the response. See the `include` parameter for
               Response creation above for more information.
 
+          stream: If set to true, the model response data will be streamed to the client using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          starting_after: When retrieving a background response, this parameter can be used to start
+            replaying after an event with the given sequence number. Must be used in conjunction with
+            the `stream` parameter set to `true`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -909,9 +1064,18 @@ class Responses(SyncAPIResource):
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform({"include": include}, response_retrieve_params.ResponseRetrieveParams),
+                query=maybe_transform(
+                    {
+                        "include": include,
+                        "stream": stream,
+                        "starting_after": starting_after,
+                    },
+                    response_retrieve_params.ResponseRetrieveParams,
+                ),
             ),
             cast_to=Response,
+            stream=stream or False,
+            stream_cls=Stream[ResponseStreamEvent],
         )
 
     def delete(
@@ -1189,8 +1353,8 @@ class AsyncResponses(AsyncAPIResource):
               - `disabled` (default): If a model response will exceed the context window size
                 for a model, the request will fail with a 400 error.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
@@ -1382,8 +1546,8 @@ class AsyncResponses(AsyncAPIResource):
               - `disabled` (default): If a model response will exceed the context window size
                 for a model, the request will fail with a 400 error.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
@@ -1575,8 +1739,8 @@ class AsyncResponses(AsyncAPIResource):
               - `disabled` (default): If a model response will exceed the context window size
                 for a model, the request will fail with a 400 error.
 
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
+          user: A stable identifier for your end-users. Used to boost cache hit rates by better
+              bucketing similar requests and to help OpenAI detect and prevent abuse.
               [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
@@ -1657,11 +1821,28 @@ class AsyncResponses(AsyncAPIResource):
             stream_cls=AsyncStream[ResponseStreamEvent],
         )
 
+    @overload
+    def stream(
+        self,
+        *,
+        response_id: str,
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncResponseStreamManager[TextFormatT]: ...
+
+    @overload
     def stream(
         self,
         *,
         input: Union[str, ResponseInputParam],
         model: Union[str, ChatModel],
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
         text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
         tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
         include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
@@ -1684,48 +1865,133 @@ class AsyncResponses(AsyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncResponseStreamManager[TextFormatT]:
-        if is_given(text_format):
-            if not text:
-                text = {}
-
-            if "format" in text:
-                raise TypeError("Cannot mix and match text.format with text_format")
+    ) -> AsyncResponseStreamManager[TextFormatT]: ...
 
-            text["format"] = _type_to_text_format_param(text_format)
+    def stream(
+        self,
+        *,
+        response_id: str | NotGiven = NOT_GIVEN,
+        input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel] | NotGiven = NOT_GIVEN,
+        background: Optional[bool] | NotGiven = NOT_GIVEN,
+        text_format: type[TextFormatT] | NotGiven = NOT_GIVEN,
+        tools: Iterable[ParseableToolParam] | NotGiven = NOT_GIVEN,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncResponseStreamManager[TextFormatT]:
+        new_response_args = {
+            "input": input,
+            "model": model,
+            "include": include,
+            "instructions": instructions,
+            "max_output_tokens": max_output_tokens,
+            "metadata": metadata,
+            "parallel_tool_calls": parallel_tool_calls,
+            "previous_response_id": previous_response_id,
+            "reasoning": reasoning,
+            "store": store,
+            "temperature": temperature,
+            "text": text,
+            "tool_choice": tool_choice,
+            "top_p": top_p,
+            "truncation": truncation,
+            "user": user,
+            "background": background,
+        }
+        new_response_args_names = [k for k, v in new_response_args.items() if is_given(v)]
+
+        if (is_given(response_id) or is_given(starting_after)) and len(new_response_args_names) > 0:
+            raise ValueError(
+                "Cannot provide both response_id/starting_after can't be provided together with "
+                + ", ".join(new_response_args_names)
+            )
 
         tools = _make_tools(tools)
+        if len(new_response_args_names) > 0:
+            if isinstance(input, NotGiven):
+                raise ValueError("input must be provided when creating a new response")
+
+            if not is_given(model):
+                raise ValueError("model must be provided when creating a new response")
+
+            if is_given(text_format):
+                if not text:
+                    text = {}
+
+                if "format" in text:
+                    raise TypeError("Cannot mix and match text.format with text_format")
+
+                text["format"] = _type_to_text_format_param(text_format)
+
+            api_request = self.create(
+                input=input,
+                model=model,
+                stream=True,
+                tools=tools,
+                include=include,
+                instructions=instructions,
+                max_output_tokens=max_output_tokens,
+                metadata=metadata,
+                parallel_tool_calls=parallel_tool_calls,
+                previous_response_id=previous_response_id,
+                store=store,
+                temperature=temperature,
+                text=text,
+                tool_choice=tool_choice,
+                reasoning=reasoning,
+                top_p=top_p,
+                truncation=truncation,
+                user=user,
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            )
 
-        api_request = self.create(
-            input=input,
-            model=model,
-            tools=tools,
-            include=include,
-            instructions=instructions,
-            max_output_tokens=max_output_tokens,
-            metadata=metadata,
-            parallel_tool_calls=parallel_tool_calls,
-            previous_response_id=previous_response_id,
-            store=store,
-            stream=True,
-            temperature=temperature,
-            text=text,
-            tool_choice=tool_choice,
-            reasoning=reasoning,
-            top_p=top_p,
-            truncation=truncation,
-            user=user,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-
-        return AsyncResponseStreamManager(
-            api_request,
-            text_format=text_format,
-            input_tools=tools,
-        )
+            return AsyncResponseStreamManager(
+                api_request,
+                text_format=text_format,
+                input_tools=tools,
+                starting_after=None,
+            )
+        else:
+            if isinstance(response_id, NotGiven):
+                raise ValueError("response_id must be provided when streaming an existing response")
+
+            api_request = self.retrieve(
+                response_id,
+                stream=True,
+                include=include or [],
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+            )
+            return AsyncResponseStreamManager(
+                api_request,
+                text_format=text_format,
+                input_tools=tools,
+                starting_after=starting_after if is_given(starting_after) else None,
+            )
 
     async def parse(
         self,
@@ -1811,6 +2077,7 @@ class AsyncResponses(AsyncAPIResource):
             cast_to=cast(Type[ParsedResponse[TextFormatT]], Response),
         )
 
+    @overload
     async def retrieve(
         self,
         response_id: str,
@@ -1822,7 +2089,54 @@ class AsyncResponses(AsyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Response:
+    ) -> Response: ...
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: Literal[True],
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ResponseStreamEvent]: ...
+
+    @overload
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]: ...
+
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        stream: bool = False,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        starting_after: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
         """
         Retrieves a model response with the given ID.
 
@@ -1830,6 +2144,10 @@ class AsyncResponses(AsyncAPIResource):
           include: Additional fields to include in the response. See the `include` parameter for
               Response creation above for more information.
 
+          stream:
+          starting_after: When retrieving a background response, this parameter can be used to start
+          replaying after an event with the given sequence number. Must be used in
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1848,10 +2166,17 @@ class AsyncResponses(AsyncAPIResource):
                 extra_body=extra_body,
                 timeout=timeout,
                 query=await async_maybe_transform(
-                    {"include": include}, response_retrieve_params.ResponseRetrieveParams
+                    {
+                        "include": include,
+                        "stream": stream,
+                        "starting_after": starting_after,
+                    },
+                    response_retrieve_params.ResponseRetrieveParams,
                 ),
             ),
             cast_to=Response,
+            stream=stream or False,
+            stream_cls=AsyncStream[ResponseStreamEvent],
         )
 
     async def delete(
src/openai/types/chat/completion_create_params.py
@@ -292,9 +292,10 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     user: str
-    """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
+    """A stable identifier for your end-users.
+
+    Used to boost cache hit rates by better bucketing similar requests and to help
+    OpenAI detect and prevent abuse.
     [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
 
src/openai/types/responses/__init__.py
@@ -97,7 +97,6 @@ from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent
 from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
 from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam as ResponseFileSearchToolCallParam
 from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent as ResponseMcpListToolsFailedEvent
-from .response_text_annotation_delta_event import ResponseTextAnnotationDeltaEvent as ResponseTextAnnotationDeltaEvent
 from .response_audio_transcript_delta_event import (
     ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
 )
src/openai/types/responses/input_item_list_params.py
@@ -30,7 +30,7 @@ class InputItemListParams(TypedDict, total=False):
     """
 
     order: Literal["asc", "desc"]
-    """The order to return the input items in. Default is `asc`.
+    """The order to return the input items in. Default is `desc`.
 
     - `asc`: Return the input items in ascending order.
     - `desc`: Return the input items in descending order.
src/openai/types/responses/response.py
@@ -209,9 +209,10 @@ class Response(BaseModel):
     """
 
     user: Optional[str] = None
-    """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
+    """A stable identifier for your end-users.
+
+    Used to boost cache hit rates by better bucketing similar requests and to help
+    OpenAI detect and prevent abuse.
     [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
 
src/openai/types/responses/response_create_params.py
@@ -199,9 +199,10 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     """
 
     user: str
-    """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
+    """A stable identifier for your end-users.
+
+    Used to boost cache hit rates by better bucketing similar requests and to help
+    OpenAI detect and prevent abuse.
     [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
 
src/openai/types/responses/response_stream_event.py
@@ -28,7 +28,6 @@ from .response_mcp_call_completed_event import ResponseMcpCallCompletedEvent
 from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent
 from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
 from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent
-from .response_text_annotation_delta_event import ResponseTextAnnotationDeltaEvent
 from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
 from .response_reasoning_summary_done_event import ResponseReasoningSummaryDoneEvent
 from .response_mcp_call_arguments_done_event import ResponseMcpCallArgumentsDoneEvent
@@ -93,7 +92,6 @@ ResponseStreamEvent: TypeAlias = Annotated[
         ResponseReasoningSummaryTextDoneEvent,
         ResponseRefusalDeltaEvent,
         ResponseRefusalDoneEvent,
-        ResponseTextAnnotationDeltaEvent,
         ResponseTextDeltaEvent,
         ResponseTextDoneEvent,
         ResponseWebSearchCallCompletedEvent,
src/openai/types/responses/response_text_annotation_delta_event.py
@@ -1,82 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "ResponseTextAnnotationDeltaEvent",
-    "Annotation",
-    "AnnotationFileCitation",
-    "AnnotationURLCitation",
-    "AnnotationFilePath",
-]
-
-
-class AnnotationFileCitation(BaseModel):
-    file_id: str
-    """The ID of the file."""
-
-    index: int
-    """The index of the file in the list of files."""
-
-    type: Literal["file_citation"]
-    """The type of the file citation. Always `file_citation`."""
-
-
-class AnnotationURLCitation(BaseModel):
-    end_index: int
-    """The index of the last character of the URL citation in the message."""
-
-    start_index: int
-    """The index of the first character of the URL citation in the message."""
-
-    title: str
-    """The title of the web resource."""
-
-    type: Literal["url_citation"]
-    """The type of the URL citation. Always `url_citation`."""
-
-    url: str
-    """The URL of the web resource."""
-
-
-class AnnotationFilePath(BaseModel):
-    file_id: str
-    """The ID of the file."""
-
-    index: int
-    """The index of the file in the list of files."""
-
-    type: Literal["file_path"]
-    """The type of the file path. Always `file_path`."""
-
-
-Annotation: TypeAlias = Annotated[
-    Union[AnnotationFileCitation, AnnotationURLCitation, AnnotationFilePath], PropertyInfo(discriminator="type")
-]
-
-
-class ResponseTextAnnotationDeltaEvent(BaseModel):
-    annotation: Annotation
-    """A citation to a file."""
-
-    annotation_index: int
-    """The index of the annotation that was added."""
-
-    content_index: int
-    """The index of the content part that the text annotation was added to."""
-
-    item_id: str
-    """The ID of the output item that the text annotation was added to."""
-
-    output_index: int
-    """The index of the output item that the text annotation was added to."""
-
-    sequence_number: int
-    """The sequence number of this event."""
-
-    type: Literal["response.output_text.annotation.added"]
-    """The type of the event. Always `response.output_text.annotation.added`."""
src/openai/types/responses/response_web_search_call_completed_event.py
@@ -14,5 +14,8 @@ class ResponseWebSearchCallCompletedEvent(BaseModel):
     output_index: int
     """The index of the output item that the web search call is associated with."""
 
+    sequence_number: int
+    """The sequence number of the web search call being processed."""
+
     type: Literal["response.web_search_call.completed"]
     """The type of the event. Always `response.web_search_call.completed`."""
src/openai/types/responses/response_web_search_call_in_progress_event.py
@@ -14,5 +14,8 @@ class ResponseWebSearchCallInProgressEvent(BaseModel):
     output_index: int
     """The index of the output item that the web search call is associated with."""
 
+    sequence_number: int
+    """The sequence number of the web search call being processed."""
+
     type: Literal["response.web_search_call.in_progress"]
     """The type of the event. Always `response.web_search_call.in_progress`."""
src/openai/types/responses/response_web_search_call_searching_event.py
@@ -14,5 +14,8 @@ class ResponseWebSearchCallSearchingEvent(BaseModel):
     output_index: int
     """The index of the output item that the web search call is associated with."""
 
+    sequence_number: int
+    """The sequence number of the web search call being processed."""
+
     type: Literal["response.web_search_call.searching"]
     """The type of the event. Always `response.web_search_call.searching`."""
src/openai/types/responses/tool.py
@@ -53,9 +53,6 @@ class McpRequireApprovalMcpToolApprovalFilter(BaseModel):
     never: Optional[McpRequireApprovalMcpToolApprovalFilterNever] = None
     """A list of tools that never require approval."""
 
-    tool_names: Optional[List[str]] = None
-    """List of allowed tool names."""
-
 
 McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"], None]
 
src/openai/types/responses/tool_param.py
@@ -53,9 +53,6 @@ class McpRequireApprovalMcpToolApprovalFilter(TypedDict, total=False):
     never: McpRequireApprovalMcpToolApprovalFilterNever
     """A list of tools that never require approval."""
 
-    tool_names: List[str]
-    """List of allowed tool names."""
-
 
 McpRequireApproval: TypeAlias = Union[McpRequireApprovalMcpToolApprovalFilter, Literal["always", "never"]]
 
src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.81.0"  # x-release-please-version
+__version__ = "1.82.0"  # x-release-please-version
.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.81.0"
+  ".": "1.82.0"
 }
\ No newline at end of file
.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 111
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-6af14840a810139bf407013167ce1c8fb21b6ef8eb0cc3db58b51af7d52c4b5a.yml
-openapi_spec_hash: 3241bde6b273cfec0035e522bd07985d
-config_hash: 7367b68a4e7db36885c1a886f57b17f6
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-fc64d7c2c8f51f750813375356c3f3fdfc7fc1b1b34f19c20a5410279d445d37.yml
+openapi_spec_hash: 618285fc70199ee32b9ebe4bf72f7e4c
+config_hash: c497f6b750cc89c0bf2eefc0bc839c70
api.md
@@ -764,7 +764,6 @@ from openai.types.responses import (
     ResponseRefusalDoneEvent,
     ResponseStatus,
     ResponseStreamEvent,
-    ResponseTextAnnotationDeltaEvent,
     ResponseTextConfig,
     ResponseTextDeltaEvent,
     ResponseTextDoneEvent,
CHANGELOG.md
@@ -1,5 +1,24 @@
 # Changelog
 
+## 1.82.0 (2025-05-22)
+
+Full Changelog: [v1.81.0...v1.82.0](https://github.com/openai/openai-python/compare/v1.81.0...v1.82.0)
+
+### Features
+
+* **api:** new streaming helpers for background responses ([2a65d4d](https://github.com/openai/openai-python/commit/2a65d4de0aaba7801edd0df10f225530fd4969bd))
+
+
+### Bug Fixes
+
+* **azure:** mark images/edits as a deployment endpoint [#2371](https://github.com/openai/openai-python/issues/2371) ([5d1d5b4](https://github.com/openai/openai-python/commit/5d1d5b4b6072afe9fd7909b1a36014c8c11c1ad6))
+
+
+### Documentation
+
+* **readme:** another async example fix ([9ec8289](https://github.com/openai/openai-python/commit/9ec8289041f395805c67efd97847480f84eb9dac))
+* **readme:** fix async example ([37d0b25](https://github.com/openai/openai-python/commit/37d0b25b6e82cd381e5d1aa6e28f1a1311d02353))
+
 ## 1.81.0 (2025-05-21)
 
 Full Changelog: [v1.80.0...v1.81.0](https://github.com/openai/openai-python/compare/v1.80.0...v1.81.0)
pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.81.0"
+version = "1.82.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
README.md
@@ -174,13 +174,13 @@ client = AsyncOpenAI()
 
 
 async def main():
-    stream = client.responses.create(
+    stream = await client.responses.create(
         model="gpt-4o",
         input="Write a one-sentence bedtime story about a unicorn.",
         stream=True,
     )
 
-    for event in stream:
+    async for event in stream:
         print(event)