Commit 5429f696

Stainless Bot <107565488+stainless-bot@users.noreply.github.com>
2024-03-14 04:35:35
release: 1.14.0 (#1234) tag: v1.14.0
* feat(assistants): add support for streaming (#1233) See the reference docs for more information: https://platform.openai.com/docs/api-reference/assistants-streaming We've also improved some of the names for the types in the assistants beta, non exhaustive list: - `CodeToolCall` -> `CodeInterpreterToolCall` - `MessageContentImageFile` -> `ImageFileContentBlock` - `MessageContentText` -> `TextContentBlock` - `ThreadMessage` -> `Message` - `ThreadMessageDeleted` -> `MessageDeleted` * release: 1.14.0
1 parent ec104bf
Changed files (78)
examples
src
tests
examples/assistant_stream.py
@@ -0,0 +1,33 @@
+import openai
+
+# gets API Key from environment variable OPENAI_API_KEY
+client = openai.OpenAI()
+
+assistant = client.beta.assistants.create(
+    name="Math Tutor",
+    instructions="You are a personal math tutor. Write and run code to answer math questions.",
+    tools=[{"type": "code_interpreter"}],
+    model="gpt-4-1106-preview",
+)
+
+thread = client.beta.threads.create()
+
+message = client.beta.threads.messages.create(
+    thread_id=thread.id,
+    role="user",
+    content="I need to solve the equation `3x + 11 = 14`. Can you help me?",
+)
+
+print("starting run stream")
+
+stream = client.beta.threads.runs.create(
+    thread_id=thread.id,
+    assistant_id=assistant.id,
+    instructions="Please address the user as Jane Doe. The user has a premium account.",
+    stream=True,
+)
+
+for event in stream:
+    print(event.model_dump_json(indent=2, exclude_unset=True))
+
+client.beta.assistants.delete(assistant.id)
examples/assistant_stream_helpers.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+from typing_extensions import override
+
+import openai
+from openai import AssistantEventHandler
+from openai.types.beta import AssistantStreamEvent
+from openai.types.beta.threads import Text, TextDelta
+from openai.types.beta.threads.runs import RunStep, RunStepDelta
+
+
+class EventHandler(AssistantEventHandler):
+    @override
+    def on_event(self, event: AssistantStreamEvent) -> None:
+        if event.event == "thread.run.step.created":
+            details = event.data.step_details
+            if details.type == "tool_calls":
+                print("Generating code to interpret:\n\n```py")
+        elif event.event == "thread.message.created":
+            print("\nResponse:\n")
+
+    @override
+    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
+        print(delta.value, end="", flush=True)
+
+    @override
+    def on_run_step_done(self, run_step: RunStep) -> None:
+        details = run_step.step_details
+        if details.type == "tool_calls":
+            for tool in details.tool_calls:
+                if tool.type == "code_interpreter":
+                    print("\n```\nExecuting code...")
+
+    @override
+    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
+        details = delta.step_details
+        if details is not None and details.type == "tool_calls":
+            for tool in details.tool_calls or []:
+                if tool.type == "code_interpreter" and tool.code_interpreter and tool.code_interpreter.input:
+                    print(tool.code_interpreter.input, end="", flush=True)
+
+
+def main() -> None:
+    client = openai.OpenAI()
+
+    assistant = client.beta.assistants.create(
+        name="Math Tutor",
+        instructions="You are a personal math tutor. Write and run code to answer math questions.",
+        tools=[{"type": "code_interpreter"}],
+        model="gpt-4-1106-preview",
+    )
+
+    try:
+        question = "I need to solve the equation `3x + 11 = 14`. Can you help me?"
+
+        thread = client.beta.threads.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": question,
+                },
+            ]
+        )
+        print(f"Question: {question}\n")
+
+        with client.beta.threads.runs.create_and_stream(
+            thread_id=thread.id,
+            assistant_id=assistant.id,
+            instructions="Please address the user as Jane Doe. The user has a premium account.",
+            event_handler=EventHandler(),
+        ) as stream:
+            stream.until_done()
+            print()
+    finally:
+        client.beta.assistants.delete(assistant.id)
+
+
+main()
src/openai/lib/streaming/__init__.py
@@ -0,0 +1,8 @@
+from ._assistants import (
+    AssistantEventHandler as AssistantEventHandler,
+    AssistantEventHandlerT as AssistantEventHandlerT,
+    AssistantStreamManager as AssistantStreamManager,
+    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT as AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager as AsyncAssistantStreamManager,
+)
src/openai/lib/streaming/_assistants.py
@@ -0,0 +1,1035 @@
+from __future__ import annotations
+
+import asyncio
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Callable, Iterable, Iterator, cast
+from typing_extensions import Awaitable, AsyncIterable, AsyncIterator, assert_never
+
+import httpx
+
+from ..._utils import is_dict, is_list, consume_sync_iterator, consume_async_iterator
+from ..._models import construct_type
+from ..._streaming import Stream, AsyncStream
+from ...types.beta import AssistantStreamEvent
+from ...types.beta.threads import (
+    Run,
+    Text,
+    Message,
+    ImageFile,
+    TextDelta,
+    MessageDelta,
+    MessageContent,
+    MessageContentDelta,
+)
+from ...types.beta.threads.runs import RunStep, ToolCall, RunStepDelta, ToolCallDelta
+
+
+class AssistantEventHandler:
+    text_deltas: Iterable[str]
+    """Iterator over just the text deltas in the stream.
+
+    This corresponds to the `thread.message.delta` event
+    in the API.
+
+    ```py
+    for text in stream.text_deltas:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self) -> None:
+        self._current_event: AssistantStreamEvent | None = None
+        self._current_message_content_index: int | None = None
+        self._current_message_content: MessageContent | None = None
+        self._current_tool_call_index: int | None = None
+        self._current_tool_call: ToolCall | None = None
+        self.__current_run_step_id: str | None = None
+        self.__current_run: Run | None = None
+        self.__run_step_snapshots: dict[str, RunStep] = {}
+        self.__message_snapshots: dict[str, Message] = {}
+        self.__current_message_snapshot: Message | None = None
+
+        self.text_deltas = self.__text_deltas__()
+        self._iterator = self.__stream__()
+        self.__stream: Stream[AssistantStreamEvent] | None = None
+
+    def _init(self, stream: Stream[AssistantStreamEvent]) -> None:
+        if self.__stream:
+            raise RuntimeError(
+                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
+            )
+
+        self.__stream = stream
+
+    def __next__(self) -> AssistantStreamEvent:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[AssistantStreamEvent]:
+        for item in self._iterator:
+            yield item
+
+    @property
+    def current_event(self) -> AssistantStreamEvent | None:
+        return self._current_event
+
+    @property
+    def current_run(self) -> Run | None:
+        return self.__current_run
+
+    @property
+    def current_run_step_snapshot(self) -> RunStep | None:
+        if not self.__current_run_step_id:
+            return None
+
+        return self.__run_step_snapshots[self.__current_run_step_id]
+
+    @property
+    def current_message_snapshot(self) -> Message | None:
+        return self.__current_message_snapshot
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called when the context manager exits.
+        """
+        if self.__stream:
+            self.__stream.close()
+
+    def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        consume_sync_iterator(self)
+
+    def get_final_run(self) -> Run:
+        """Wait for the stream to finish and returns the completed Run object"""
+        self.until_done()
+
+        if not self.__current_run:
+            raise RuntimeError("No final run object found")
+
+        return self.__current_run
+
+    def get_final_run_steps(self) -> list[RunStep]:
+        """Wait for the stream to finish and returns the steps taken in this run"""
+        self.until_done()
+
+        if not self.__run_step_snapshots:
+            raise RuntimeError("No run steps found")
+
+        return [step for step in self.__run_step_snapshots.values()]
+
+    def get_final_messages(self) -> list[Message]:
+        """Wait for the stream to finish and returns the messages emitted in this run"""
+        self.until_done()
+
+        if not self.__message_snapshots:
+            raise RuntimeError("No messages found")
+
+        return [message for message in self.__message_snapshots.values()]
+
+    def __text_deltas__(self) -> Iterator[str]:
+        for event in self:
+            if event.event != "thread.message.delta":
+                continue
+
+            for content_delta in event.data.delta.content or []:
+                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
+                    yield content_delta.text.value
+
+    # event handlers
+
+    def on_end(self) -> None:
+        """Fires when the stream has finished.
+
+        This happens if the stream is read to completion
+        or if an exception occurs during iteration.
+        """
+
+    def on_event(self, event: AssistantStreamEvent) -> None:
+        """Callback that is fired for every Server-Sent-Event"""
+
+    def on_run_step_created(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is created"""
+
+    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
+        """Callback that is fired whenever a run step delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the run step. For example, a tool calls event may
+        look like this:
+
+        # delta
+        tool_calls=[
+            RunStepDeltaToolCallsCodeInterpreter(
+                index=0,
+                type='code_interpreter',
+                id=None,
+                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
+            )
+        ]
+        # snapshot
+        tool_calls=[
+            CodeToolCall(
+                id='call_wKayJlcYV12NiadiZuJXxcfx',
+                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
+                type='code_interpreter',
+                index=0
+            )
+        ],
+        """
+
+    def on_run_step_done(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is completed"""
+
+    def on_tool_call_created(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call is created"""
+
+    def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    def on_tool_call_done(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    def on_exception(self, exception: Exception) -> None:
+        """Fired whenever an exception happens during streaming"""
+
+    def on_timeout(self) -> None:
+        """Fires if the request times out"""
+
+    def on_message_created(self, message: Message) -> None:
+        """Callback that is fired when a message is created"""
+
+    def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
+        """Callback that is fired whenever a message delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the message. For example, a text content event may
+        look like this:
+
+        # delta
+        MessageDeltaText(
+            index=0,
+            type='text',
+            text=Text(
+                value=' Jane'
+            ),
+        )
+        # snapshot
+        MessageContentText(
+            index=0,
+            type='text',
+            text=Text(
+                value='Certainly, Jane'
+            ),
+        )
+        """
+
+    def on_message_done(self, message: Message) -> None:
+        """Callback that is fired when a message is completed"""
+
+    def on_text_created(self, text: Text) -> None:
+        """Callback that is fired when a text content block is created"""
+
+    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
+        """Callback that is fired whenever a text content delta is returned
+        by the API.
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the text. For example:
+
+        on_text_delta(TextDelta(value="The"), Text(value="The")),
+        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
+        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
+        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
+        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
+        """
+
+    def on_text_done(self, text: Text) -> None:
+        """Callback that is fired when a text content block is finished"""
+
+    def on_image_file_done(self, image_file: ImageFile) -> None:
+        """Callback that is fired when an image file block is finished"""
+
+    def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
+        self._current_event = event
+        self.on_event(event)
+
+        self.__current_message_snapshot, new_content = accumulate_event(
+            event=event,
+            current_message_snapshot=self.__current_message_snapshot,
+        )
+        if self.__current_message_snapshot is not None:
+            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
+
+        accumulate_run_step(
+            event=event,
+            run_step_snapshots=self.__run_step_snapshots,
+        )
+
+        for content_delta in new_content:
+            assert self.__current_message_snapshot is not None
+
+            block = self.__current_message_snapshot.content[content_delta.index]
+            if block.type == "text":
+                self.on_text_created(block.text)
+
+        if (
+            event.event == "thread.run.completed"
+            or event.event == "thread.run.cancelled"
+            or event.event == "thread.run.expired"
+            or event.event == "thread.run.failed"
+            or event.event == "thread.run.requires_action"
+        ):
+            self.__current_run = event.data
+            if self._current_tool_call:
+                self.on_tool_call_done(self._current_tool_call)
+        elif (
+            event.event == "thread.run.created"
+            or event.event == "thread.run.in_progress"
+            or event.event == "thread.run.cancelling"
+            or event.event == "thread.run.queued"
+        ):
+            self.__current_run = event.data
+        elif event.event == "thread.message.created":
+            self.on_message_created(event.data)
+        elif event.event == "thread.message.delta":
+            snapshot = self.__current_message_snapshot
+            assert snapshot is not None
+
+            message_delta = event.data.delta
+            if message_delta.content is not None:
+                for content_delta in message_delta.content:
+                    if content_delta.type == "text" and content_delta.text:
+                        snapshot_content = snapshot.content[content_delta.index]
+                        assert snapshot_content.type == "text"
+                        self.on_text_delta(content_delta.text, snapshot_content.text)
+
+                    # If the delta is for a new message content:
+                    # - emit on_text_done/on_image_file_done for the previous message content
+                    # - emit on_text_created/on_image_created for the new message content
+                    if content_delta.index != self._current_message_content_index:
+                        if self._current_message_content is not None:
+                            if self._current_message_content.type == "text":
+                                self.on_text_done(self._current_message_content.text)
+                            elif self._current_message_content.type == "image_file":
+                                self.on_image_file_done(self._current_message_content.image_file)
+
+                        self._current_message_content_index = content_delta.index
+                        self._current_message_content = snapshot.content[content_delta.index]
+
+                    # Update the current_message_content (delta event is correctly emitted already)
+                    self._current_message_content = snapshot.content[content_delta.index]
+
+            self.on_message_delta(event.data.delta, snapshot)
+        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
+            self.__current_message_snapshot = event.data
+            self.__message_snapshots[event.data.id] = event.data
+
+            if self._current_message_content_index is not None:
+                content = event.data.content[self._current_message_content_index]
+                if content.type == "text":
+                    self.on_text_done(content.text)
+                elif content.type == "image_file":
+                    self.on_image_file_done(content.image_file)
+
+            self.on_message_done(event.data)
+        elif event.event == "thread.run.step.created":
+            self.__current_run_step_id = event.data.id
+            self.on_run_step_created(event.data)
+        elif event.event == "thread.run.step.in_progress":
+            self.__current_run_step_id = event.data.id
+        elif event.event == "thread.run.step.delta":
+            step_snapshot = self.__run_step_snapshots[event.data.id]
+
+            run_step_delta = event.data.delta
+            if (
+                run_step_delta.step_details
+                and run_step_delta.step_details.type == "tool_calls"
+                and run_step_delta.step_details.tool_calls is not None
+            ):
+                assert step_snapshot.step_details.type == "tool_calls"
+                for tool_call_delta in run_step_delta.step_details.tool_calls:
+                    if tool_call_delta.index == self._current_tool_call_index:
+                        self.on_tool_call_delta(
+                            tool_call_delta,
+                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
+                        )
+
+                    # If the delta is for a new tool call:
+                    # - emit on_tool_call_done for the previous tool_call
+                    # - emit on_tool_call_created for the new tool_call
+                    if tool_call_delta.index != self._current_tool_call_index:
+                        if self._current_tool_call is not None:
+                            self.on_tool_call_done(self._current_tool_call)
+
+                        self._current_tool_call_index = tool_call_delta.index
+                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+                        self.on_tool_call_created(self._current_tool_call)
+
+                    # Update the current_tool_call (delta event is correctly emitted already)
+                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+
+            self.on_run_step_delta(
+                event.data.delta,
+                step_snapshot,
+            )
+        elif (
+            event.event == "thread.run.step.completed"
+            or event.event == "thread.run.step.cancelled"
+            or event.event == "thread.run.step.expired"
+            or event.event == "thread.run.step.failed"
+        ):
+            if self._current_tool_call:
+                self.on_tool_call_done(self._current_tool_call)
+
+            self.on_run_step_done(event.data)
+            self.__current_run_step_id = None
+        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
+            # currently no special handling
+            ...
+        else:
+            # we only want to error at build-time
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event)
+
+        self._current_event = None
+
+    def __stream__(self) -> Iterator[AssistantStreamEvent]:
+        stream = self.__stream
+        if not stream:
+            raise RuntimeError("Stream has not been started yet")
+
+        try:
+            for event in stream:
+                self._emit_sse_event(event)
+
+                yield event
+        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
+            self.on_timeout()
+            self.on_exception(exc)
+            raise
+        except Exception as exc:
+            self.on_exception(exc)
+            raise
+        finally:
+            self.on_end()
+
+
+AssistantEventHandlerT = TypeVar("AssistantEventHandlerT", bound=AssistantEventHandler)
+
+
+class AssistantStreamManager(Generic[AssistantEventHandlerT]):
+    """Wrapper over AssistantStreamEventHandler that is returned by `.stream()`
+    so that a context manager can be used.
+
+    ```py
+    with client.threads.create_and_run_stream(...) as stream:
+        for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Callable[[], Stream[AssistantStreamEvent]],
+        *,
+        event_handler: AssistantEventHandlerT,
+    ) -> None:
+        self.__stream: Stream[AssistantStreamEvent] | None = None
+        self.__event_handler = event_handler
+        self.__api_request = api_request
+
+    def __enter__(self) -> AssistantEventHandlerT:
+        self.__stream = self.__api_request()
+        self.__event_handler._init(self.__stream)
+        return self.__event_handler
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            self.__stream.close()
+
+
+class AsyncAssistantEventHandler:
+    text_deltas: AsyncIterable[str]
+    """Iterator over just the text deltas in the stream.
+
+    This corresponds to the `thread.message.delta` event
+    in the API.
+
+    ```py
+    async for text in stream.text_deltas:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    def __init__(self) -> None:
+        self._current_event: AssistantStreamEvent | None = None
+        self._current_message_content_index: int | None = None
+        self._current_message_content: MessageContent | None = None
+        self._current_tool_call_index: int | None = None
+        self._current_tool_call: ToolCall | None = None
+        self.__current_run_step_id: str | None = None
+        self.__current_run: Run | None = None
+        self.__run_step_snapshots: dict[str, RunStep] = {}
+        self.__message_snapshots: dict[str, Message] = {}
+        self.__current_message_snapshot: Message | None = None
+
+        self.text_deltas = self.__text_deltas__()
+        self._iterator = self.__stream__()
+        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
+
+    def _init(self, stream: AsyncStream[AssistantStreamEvent]) -> None:
+        if self.__stream:
+            raise RuntimeError(
+                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
+            )
+
+        self.__stream = stream
+
+    async def __anext__(self) -> AssistantStreamEvent:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[AssistantStreamEvent]:
+        async for item in self._iterator:
+            yield item
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called when the context manager exits.
+        """
+        if self.__stream:
+            await self.__stream.close()
+
+    @property
+    def current_event(self) -> AssistantStreamEvent | None:
+        return self._current_event
+
+    @property
+    def current_run(self) -> Run | None:
+        return self.__current_run
+
+    @property
+    def current_run_step_snapshot(self) -> RunStep | None:
+        if not self.__current_run_step_id:
+            return None
+
+        return self.__run_step_snapshots[self.__current_run_step_id]
+
+    @property
+    def current_message_snapshot(self) -> Message | None:
+        return self.__current_message_snapshot
+
+    async def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        await consume_async_iterator(self)
+
+    async def get_final_run(self) -> Run:
+        """Wait for the stream to finish and returns the completed Run object"""
+        await self.until_done()
+
+        if not self.__current_run:
+            raise RuntimeError("No final run object found")
+
+        return self.__current_run
+
+    async def get_final_run_steps(self) -> list[RunStep]:
+        """Wait for the stream to finish and returns the steps taken in this run"""
+        await self.until_done()
+
+        if not self.__run_step_snapshots:
+            raise RuntimeError("No run steps found")
+
+        return [step for step in self.__run_step_snapshots.values()]
+
+    async def get_final_messages(self) -> list[Message]:
+        """Wait for the stream to finish and returns the messages emitted in this run"""
+        await self.until_done()
+
+        if not self.__message_snapshots:
+            raise RuntimeError("No messages found")
+
+        return [message for message in self.__message_snapshots.values()]
+
+    async def __text_deltas__(self) -> AsyncIterator[str]:
+        async for event in self:
+            if event.event != "thread.message.delta":
+                continue
+
+            for content_delta in event.data.delta.content or []:
+                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
+                    yield content_delta.text.value
+
+    # event handlers
+
+    async def on_end(self) -> None:
+        """Fires when the stream has finished.
+
+        This happens if the stream is read to completion
+        or if an exception occurs during iteration.
+        """
+
+    async def on_event(self, event: AssistantStreamEvent) -> None:
+        """Callback that is fired for every Server-Sent-Event"""
+
+    async def on_run_step_created(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is created"""
+
+    async def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
+        """Callback that is fired whenever a run step delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the run step. For example, a tool calls event may
+        look like this:
+
+        # delta
+        tool_calls=[
+            RunStepDeltaToolCallsCodeInterpreter(
+                index=0,
+                type='code_interpreter',
+                id=None,
+                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
+            )
+        ]
+        # snapshot
+        tool_calls=[
+            CodeToolCall(
+                id='call_wKayJlcYV12NiadiZuJXxcfx',
+                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
+                type='code_interpreter',
+                index=0
+            )
+        ],
+        """
+
+    async def on_run_step_done(self, run_step: RunStep) -> None:
+        """Callback that is fired when a run step is completed"""
+
+    async def on_tool_call_created(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call is created"""
+
+    async def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    async def on_tool_call_done(self, tool_call: ToolCall) -> None:
+        """Callback that is fired when a tool call delta is encountered"""
+
+    async def on_exception(self, exception: Exception) -> None:
+        """Fired whenever an exception happens during streaming"""
+
+    async def on_timeout(self) -> None:
+        """Fires if the request times out"""
+
+    async def on_message_created(self, message: Message) -> None:
+        """Callback that is fired when a message is created"""
+
+    async def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
+        """Callback that is fired whenever a message delta is returned from the API
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the message. For example, a text content event may
+        look like this:
+
+        # delta
+        MessageDeltaText(
+            index=0,
+            type='text',
+            text=Text(
+                value=' Jane'
+            ),
+        )
+        # snapshot
+        MessageContentText(
+            index=0,
+            type='text',
+            text=Text(
+                value='Certainly, Jane'
+            ),
+        )
+        """
+
+    async def on_message_done(self, message: Message) -> None:
+        """Callback that is fired when a message is completed"""
+
+    async def on_text_created(self, text: Text) -> None:
+        """Callback that is fired when a text content block is created"""
+
+    async def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
+        """Callback that is fired whenever a text content delta is returned
+        by the API.
+
+        The first argument is just the delta as sent by the API and the second argument
+        is the accumulated snapshot of the text. For example:
+
+        on_text_delta(TextDelta(value="The"), Text(value="The")),
+        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
+        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
+        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
+        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
+        """
+
+    async def on_text_done(self, text: Text) -> None:
+        """Callback that is fired when a text content block is finished"""
+
+    async def on_image_file_done(self, image_file: ImageFile) -> None:
+        """Callback that is fired when an image file block is finished"""
+
+    async def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
+        self._current_event = event
+        await self.on_event(event)
+
+        self.__current_message_snapshot, new_content = accumulate_event(
+            event=event,
+            current_message_snapshot=self.__current_message_snapshot,
+        )
+        if self.__current_message_snapshot is not None:
+            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
+
+        accumulate_run_step(
+            event=event,
+            run_step_snapshots=self.__run_step_snapshots,
+        )
+
+        for content_delta in new_content:
+            assert self.__current_message_snapshot is not None
+
+            block = self.__current_message_snapshot.content[content_delta.index]
+            if block.type == "text":
+                await self.on_text_created(block.text)
+
+        if (
+            event.event == "thread.run.completed"
+            or event.event == "thread.run.cancelled"
+            or event.event == "thread.run.expired"
+            or event.event == "thread.run.failed"
+            or event.event == "thread.run.requires_action"
+        ):
+            self.__current_run = event.data
+            if self._current_tool_call:
+                await self.on_tool_call_done(self._current_tool_call)
+        elif (
+            event.event == "thread.run.created"
+            or event.event == "thread.run.in_progress"
+            or event.event == "thread.run.cancelling"
+            or event.event == "thread.run.queued"
+        ):
+            self.__current_run = event.data
+        elif event.event == "thread.message.created":
+            await self.on_message_created(event.data)
+        elif event.event == "thread.message.delta":
+            snapshot = self.__current_message_snapshot
+            assert snapshot is not None
+
+            message_delta = event.data.delta
+            if message_delta.content is not None:
+                for content_delta in message_delta.content:
+                    if content_delta.type == "text" and content_delta.text:
+                        snapshot_content = snapshot.content[content_delta.index]
+                        assert snapshot_content.type == "text"
+                        await self.on_text_delta(content_delta.text, snapshot_content.text)
+
+                    # If the delta is for a new message content:
+                    # - emit on_text_done/on_image_file_done for the previous message content
+                    # - emit on_text_created/on_image_created for the new message content
+                    if content_delta.index != self._current_message_content_index:
+                        if self._current_message_content is not None:
+                            if self._current_message_content.type == "text":
+                                await self.on_text_done(self._current_message_content.text)
+                            elif self._current_message_content.type == "image_file":
+                                await self.on_image_file_done(self._current_message_content.image_file)
+
+                        self._current_message_content_index = content_delta.index
+                        self._current_message_content = snapshot.content[content_delta.index]
+
+                    # Update the current_message_content (delta event is correctly emitted already)
+                    self._current_message_content = snapshot.content[content_delta.index]
+
+            await self.on_message_delta(event.data.delta, snapshot)
+        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
+            self.__current_message_snapshot = event.data
+            self.__message_snapshots[event.data.id] = event.data
+
+            if self._current_message_content_index is not None:
+                content = event.data.content[self._current_message_content_index]
+                if content.type == "text":
+                    await self.on_text_done(content.text)
+                elif content.type == "image_file":
+                    await self.on_image_file_done(content.image_file)
+
+            await self.on_message_done(event.data)
+        elif event.event == "thread.run.step.created":
+            self.__current_run_step_id = event.data.id
+            await self.on_run_step_created(event.data)
+        elif event.event == "thread.run.step.in_progress":
+            self.__current_run_step_id = event.data.id
+        elif event.event == "thread.run.step.delta":
+            step_snapshot = self.__run_step_snapshots[event.data.id]
+
+            run_step_delta = event.data.delta
+            if (
+                run_step_delta.step_details
+                and run_step_delta.step_details.type == "tool_calls"
+                and run_step_delta.step_details.tool_calls is not None
+            ):
+                assert step_snapshot.step_details.type == "tool_calls"
+                for tool_call_delta in run_step_delta.step_details.tool_calls:
+                    if tool_call_delta.index == self._current_tool_call_index:
+                        await self.on_tool_call_delta(
+                            tool_call_delta,
+                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
+                        )
+
+                    # If the delta is for a new tool call:
+                    # - emit on_tool_call_done for the previous tool_call
+                    # - emit on_tool_call_created for the new tool_call
+                    if tool_call_delta.index != self._current_tool_call_index:
+                        if self._current_tool_call is not None:
+                            await self.on_tool_call_done(self._current_tool_call)
+
+                        self._current_tool_call_index = tool_call_delta.index
+                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+                        await self.on_tool_call_created(self._current_tool_call)
+
+                    # Update the current_tool_call (delta event is correctly emitted already)
+                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
+
+            await self.on_run_step_delta(
+                event.data.delta,
+                step_snapshot,
+            )
+        elif (
+            event.event == "thread.run.step.completed"
+            or event.event == "thread.run.step.cancelled"
+            or event.event == "thread.run.step.expired"
+            or event.event == "thread.run.step.failed"
+        ):
+            if self._current_tool_call:
+                await self.on_tool_call_done(self._current_tool_call)
+
+            await self.on_run_step_done(event.data)
+            self.__current_run_step_id = None
+        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
+            # currently no special handling
+            ...
+        else:
+            # we only want to error at build-time
+            if TYPE_CHECKING:  # type: ignore[unreachable]
+                assert_never(event)
+
+        self._current_event = None
+
+    async def __stream__(self) -> AsyncIterator[AssistantStreamEvent]:
+        stream = self.__stream
+        if not stream:
+            raise RuntimeError("Stream has not been started yet")
+
+        try:
+            async for event in stream:
+                await self._emit_sse_event(event)
+
+                yield event
+        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
+            await self.on_timeout()
+            await self.on_exception(exc)
+            raise
+        except Exception as exc:
+            await self.on_exception(exc)
+            raise
+        finally:
+            await self.on_end()
+
+
+AsyncAssistantEventHandlerT = TypeVar("AsyncAssistantEventHandlerT", bound=AsyncAssistantEventHandler)
+
+
+class AsyncAssistantStreamManager(Generic[AsyncAssistantEventHandlerT]):
+    """Wrapper over AsyncAssistantStreamEventHandler that is returned by `.stream()`
+    so that an async context manager can be used without `await`ing the
+    original client call.
+
+    ```py
+    async with client.threads.create_and_run_stream(...) as stream:
+        async for event in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Awaitable[AsyncStream[AssistantStreamEvent]],
+        *,
+        event_handler: AsyncAssistantEventHandlerT,
+    ) -> None:
+        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
+        self.__event_handler = event_handler
+        self.__api_request = api_request
+
+    async def __aenter__(self) -> AsyncAssistantEventHandlerT:
+        self.__stream = await self.__api_request
+        self.__event_handler._init(self.__stream)
+        return self.__event_handler
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            await self.__stream.close()
+
+
+def accumulate_run_step(
+    *,
+    event: AssistantStreamEvent,
+    run_step_snapshots: dict[str, RunStep],
+) -> None:
+    if event.event == "thread.run.step.created":
+        run_step_snapshots[event.data.id] = event.data
+        return
+
+    if event.event == "thread.run.step.delta":
+        data = event.data
+        snapshot = run_step_snapshots[data.id]
+
+        if data.delta:
+            merged = accumulate_delta(
+                cast(
+                    "dict[object, object]",
+                    snapshot.model_dump(exclude_unset=True),
+                ),
+                cast(
+                    "dict[object, object]",
+                    data.delta.model_dump(exclude_unset=True),
+                ),
+            )
+            run_step_snapshots[snapshot.id] = cast(RunStep, construct_type(type_=RunStep, value=merged))
+
+    return None
+
+
+def accumulate_event(
+    *,
+    event: AssistantStreamEvent,
+    current_message_snapshot: Message | None,
+) -> tuple[Message | None, list[MessageContentDelta]]:
+    """Returns a tuple of message snapshot and newly created text message deltas"""
+    if event.event == "thread.message.created":
+        return event.data, []
+
+    new_content: list[MessageContentDelta] = []
+
+    if event.event != "thread.message.delta":
+        return current_message_snapshot, []
+
+    if not current_message_snapshot:
+        raise RuntimeError("Encountered a message delta with no previous snapshot")
+
+    data = event.data
+    if data.delta.content:
+        for content_delta in data.delta.content:
+            try:
+                block = current_message_snapshot.content[content_delta.index]
+            except IndexError:
+                current_message_snapshot.content.insert(
+                    content_delta.index,
+                    cast(
+                        MessageContent,
+                        construct_type(
+                            # mypy doesn't allow Content for some reason
+                            type_=cast(Any, MessageContent),
+                            value=content_delta.model_dump(exclude_unset=True),
+                        ),
+                    ),
+                )
+                new_content.append(content_delta)
+            else:
+                merged = accumulate_delta(
+                    cast(
+                        "dict[object, object]",
+                        block.model_dump(exclude_unset=True),
+                    ),
+                    cast(
+                        "dict[object, object]",
+                        content_delta.model_dump(exclude_unset=True),
+                    ),
+                )
+                current_message_snapshot.content[content_delta.index] = cast(
+                    MessageContent,
+                    construct_type(
+                        # mypy doesn't allow Content for some reason
+                        type_=cast(Any, MessageContent),
+                        value=merged,
+                    ),
+                )
+
+    return current_message_snapshot, new_content
+
+
+def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
+    for key, delta_value in delta.items():
+        if key not in acc:
+            acc[key] = delta_value
+            continue
+
+        acc_value = acc[key]
+        if acc_value is None:
+            acc[key] = delta_value
+            continue
+
+        # the `index` property is used in arrays of objects so it should
+        # not be accumulated like other values e.g.
+        # [{'foo': 'bar', 'index': 0}]
+        #
+        # the same applies to `type` properties as they're used for
+        # discriminated unions
+        if key == "index" or key == "type":
+            acc[key] = delta_value
+            continue
+
+        if isinstance(acc_value, str) and isinstance(delta_value, str):
+            acc_value += delta_value
+        elif isinstance(acc_value, (int, float)) and isinstance(delta_value, (int, float)):
+            acc_value += delta_value
+        elif is_dict(acc_value) and is_dict(delta_value):
+            acc_value = accumulate_delta(acc_value, delta_value)
+        elif is_list(acc_value) and is_list(delta_value):
+            # for lists of non-dictionary items we'll only ever get new entries
+            # in the array, existing entries will never be changed
+            if all(isinstance(x, (str, int, float)) for x in acc_value):
+                acc_value.extend(delta_value)
+                continue
+
+            for delta_entry in delta_value:
+                if not is_dict(delta_entry):
+                    raise TypeError(f"Unexpected list delta entry is not a dictionary: {delta_entry}")
+
+                try:
+                    index = delta_entry["index"]
+                except KeyError as exc:
+                    raise RuntimeError(f"Expected list delta entry to have an `index` key; {delta_entry}") from exc
+
+                if not isinstance(index, int):
+                    raise TypeError(f"Unexpected, list delta entry `index` value is not an integer; {index}")
+
+                try:
+                    acc_entry = acc_value[index]
+                except IndexError:
+                    acc_value.insert(index, delta_entry)
+                else:
+                    if not is_dict(acc_entry):
+                        raise TypeError("not handled yet")
+
+                    acc_value[index] = accumulate_delta(acc_entry, delta_entry)
+
+        acc[key] = acc_value
+
+    return acc
src/openai/resources/beta/assistants/assistants.py
@@ -28,6 +28,7 @@ from ....pagination import SyncCursorPage, AsyncCursorPage
 from ....types.beta import (
     Assistant,
     AssistantDeleted,
+    AssistantToolParam,
     assistant_list_params,
     assistant_create_params,
     assistant_update_params,
@@ -62,7 +63,7 @@ class Assistants(SyncAPIResource):
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Iterable[assistant_create_params.Tool] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -172,7 +173,7 @@ class Assistants(SyncAPIResource):
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         model: str | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Iterable[assistant_update_params.Tool] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -365,7 +366,7 @@ class AsyncAssistants(AsyncAPIResource):
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Iterable[assistant_create_params.Tool] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -475,7 +476,7 @@ class AsyncAssistants(AsyncAPIResource):
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         model: str | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Iterable[assistant_update_params.Tool] | NotGiven = NOT_GIVEN,
+        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
src/openai/resources/beta/threads/messages/messages.py
@@ -29,7 +29,7 @@ from ....._base_client import (
     AsyncPaginator,
     make_request_options,
 )
-from .....types.beta.threads import ThreadMessage, message_list_params, message_create_params, message_update_params
+from .....types.beta.threads import Message, message_list_params, message_create_params, message_update_params
 
 __all__ = ["Messages", "AsyncMessages"]
 
@@ -61,7 +61,7 @@ class Messages(SyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Create a message.
 
@@ -106,7 +106,7 @@ class Messages(SyncAPIResource):
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     def retrieve(
@@ -120,7 +120,7 @@ class Messages(SyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Retrieve a message.
 
@@ -143,7 +143,7 @@ class Messages(SyncAPIResource):
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     def update(
@@ -158,7 +158,7 @@ class Messages(SyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Modifies a message.
 
@@ -187,7 +187,7 @@ class Messages(SyncAPIResource):
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     def list(
@@ -204,7 +204,7 @@ class Messages(SyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[ThreadMessage]:
+    ) -> SyncCursorPage[Message]:
         """
         Returns a list of messages for a given thread.
 
@@ -238,7 +238,7 @@ class Messages(SyncAPIResource):
         extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
         return self._get_api_list(
             f"/threads/{thread_id}/messages",
-            page=SyncCursorPage[ThreadMessage],
+            page=SyncCursorPage[Message],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -254,7 +254,7 @@ class Messages(SyncAPIResource):
                     message_list_params.MessageListParams,
                 ),
             ),
-            model=ThreadMessage,
+            model=Message,
         )
 
 
@@ -285,7 +285,7 @@ class AsyncMessages(AsyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Create a message.
 
@@ -330,7 +330,7 @@ class AsyncMessages(AsyncAPIResource):
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     async def retrieve(
@@ -344,7 +344,7 @@ class AsyncMessages(AsyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Retrieve a message.
 
@@ -367,7 +367,7 @@ class AsyncMessages(AsyncAPIResource):
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     async def update(
@@ -382,7 +382,7 @@ class AsyncMessages(AsyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadMessage:
+    ) -> Message:
         """
         Modifies a message.
 
@@ -411,7 +411,7 @@ class AsyncMessages(AsyncAPIResource):
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ThreadMessage,
+            cast_to=Message,
         )
 
     def list(
@@ -428,7 +428,7 @@ class AsyncMessages(AsyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[ThreadMessage, AsyncCursorPage[ThreadMessage]]:
+    ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]:
         """
         Returns a list of messages for a given thread.
 
@@ -462,7 +462,7 @@ class AsyncMessages(AsyncAPIResource):
         extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
         return self._get_api_list(
             f"/threads/{thread_id}/messages",
-            page=AsyncCursorPage[ThreadMessage],
+            page=AsyncCursorPage[Message],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -478,7 +478,7 @@ class AsyncMessages(AsyncAPIResource):
                     message_list_params.MessageListParams,
                 ),
             ),
-            model=ThreadMessage,
+            model=Message,
         )
 
 
src/openai/resources/beta/threads/runs/runs.py
@@ -2,7 +2,8 @@
 
 from __future__ import annotations
 
-from typing import Iterable, Optional
+from typing import Iterable, Optional, overload
+from functools import partial
 from typing_extensions import Literal
 
 import httpx
@@ -18,17 +19,28 @@ from .steps import (
 )
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ....._utils import (
+    required_args,
     maybe_transform,
     async_maybe_transform,
 )
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
 from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....._streaming import Stream, AsyncStream
 from .....pagination import SyncCursorPage, AsyncCursorPage
+from .....types.beta import AssistantToolParam, AssistantStreamEvent
 from ....._base_client import (
     AsyncPaginator,
     make_request_options,
 )
+from .....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
 from .....types.beta.threads import (
     Run,
     run_list_params,
@@ -53,6 +65,7 @@ class Runs(SyncAPIResource):
     def with_streaming_response(self) -> RunsWithStreamingResponse:
         return RunsWithStreamingResponse(self)
 
+    @overload
     def create(
         self,
         thread_id: str,
@@ -62,7 +75,8 @@ class Runs(SyncAPIResource):
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         model: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[run_create_params.Tool]] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -96,6 +110,134 @@ class Runs(SyncAPIResource):
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
           tools: Override the tools the assistant can use for this run. This is useful for
               modifying the behavior on a per-run basis.
 
@@ -107,6 +249,27 @@ class Runs(SyncAPIResource):
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
@@ -119,6 +282,7 @@ class Runs(SyncAPIResource):
                     "instructions": instructions,
                     "metadata": metadata,
                     "model": model,
+                    "stream": stream,
                     "tools": tools,
                 },
                 run_create_params.RunCreateParams,
@@ -127,6 +291,8 @@ class Runs(SyncAPIResource):
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
         )
 
     def retrieve(
@@ -314,132 +480,88 @@ class Runs(SyncAPIResource):
             cast_to=Run,
         )
 
-    def submit_tool_outputs(
+    @overload
+    def create_and_stream(
         self,
-        run_id: str,
         *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         thread_id: str,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=maybe_transform(
-                {"tool_outputs": tool_outputs}, run_submit_tool_outputs_params.RunSubmitToolOutputsParams
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
-
-
-class AsyncRuns(AsyncAPIResource):
-    @cached_property
-    def steps(self) -> AsyncSteps:
-        return AsyncSteps(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncRunsWithRawResponse:
-        return AsyncRunsWithRawResponse(self)
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a Run stream"""
+        ...
 
-    @cached_property
-    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
-        return AsyncRunsWithStreamingResponse(self)
-
-    async def create(
+    @overload
+    def create_and_stream(
         self,
-        thread_id: str,
         *,
         assistant_id: str,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         model: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[run_create_params.Tool]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
-
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
 
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a Run stream"""
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v1",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
             f"/threads/{thread_id}/runs",
-            body=await async_maybe_transform(
+            body=maybe_transform(
                 {
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
                     "instructions": instructions,
                     "metadata": metadata,
                     "model": model,
+                    "stream": True,
                     "tools": tools,
                 },
                 run_create_params.RunCreateParams,
@@ -448,13 +570,19 @@ class AsyncRuns(AsyncAPIResource):
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
         )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
 
-    async def retrieve(
+    @overload
+    def submit_tool_outputs(
         self,
         run_id: str,
         *,
         thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -463,9 +591,18 @@ class AsyncRuns(AsyncAPIResource):
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Run:
         """
-        Retrieves a run.
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
 
         Args:
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -474,12 +611,485 @@ class AsyncRuns(AsyncAPIResource):
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
+        ...
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state.
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state.
+        """
+        ...
+
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state.
+        """
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v1",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = partial(
+            self._post,
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+        return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler())
+
+
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def steps(self) -> AsyncSteps:
+        return AsyncSteps(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        return AsyncRunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        stream: bool,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs",
+            body=await async_maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "stream": stream,
+                    "tools": tools,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    async def retrieve(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Retrieves a run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
+        return await self._get(
             f"/threads/{thread_id}/runs/{run_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -635,12 +1245,111 @@ class AsyncRuns(AsyncAPIResource):
             cast_to=Run,
         )
 
+    @overload
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a Run stream"""
+        ...
+
+    @overload
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a Run stream"""
+        ...
+
+    def create_and_stream(
+        self,
+        *,
+        assistant_id: str,
+        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a Run stream"""
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v1",
+            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "additional_instructions": additional_instructions,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "stream": True,
+                    "tools": tools,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
+
+    @overload
     async def submit_tool_outputs(
         self,
         run_id: str,
         *,
         thread_id: str,
         tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -657,6 +1366,86 @@ class AsyncRuns(AsyncAPIResource):
         Args:
           tool_outputs: A list of tools for which the outputs are being submitted.
 
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -665,6 +1454,23 @@ class AsyncRuns(AsyncAPIResource):
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    async def submit_tool_outputs(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
         if not run_id:
@@ -673,13 +1479,111 @@ class AsyncRuns(AsyncAPIResource):
         return await self._post(
             f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
             body=await async_maybe_transform(
-                {"tool_outputs": tool_outputs}, run_submit_tool_outputs_params.RunSubmitToolOutputsParams
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state.
+        """
+        ...
+
+    @overload
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state.
+        """
+        ...
+
+    def submit_tool_outputs_stream(
+        self,
+        *,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        run_id: str,
+        thread_id: str,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """
+        Submit the tool outputs from a previous run and stream the run to a terminal
+        state.
+        """
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v1",
+            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
+            body=maybe_transform(
+                {
+                    "tool_outputs": tool_outputs,
+                    "stream": True,
+                },
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
         )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
 
 class RunsWithRawResponse:
src/openai/resources/beta/threads/threads.py
@@ -2,7 +2,9 @@
 
 from __future__ import annotations
 
-from typing import Iterable, Optional
+from typing import Iterable, Optional, overload
+from functools import partial
+from typing_extensions import Literal
 
 import httpx
 
@@ -25,6 +27,7 @@ from .messages import (
 )
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ...._utils import (
+    required_args,
     maybe_transform,
     async_maybe_transform,
 )
@@ -32,9 +35,11 @@ from .runs.runs import Runs, AsyncRuns
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
 from ....types.beta import (
     Thread,
     ThreadDeleted,
+    AssistantStreamEvent,
     thread_create_params,
     thread_update_params,
     thread_create_and_run_params,
@@ -42,6 +47,14 @@ from ....types.beta import (
 from ...._base_client import (
     make_request_options,
 )
+from ....lib.streaming import (
+    AssistantEventHandler,
+    AssistantEventHandlerT,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantEventHandlerT,
+    AsyncAssistantStreamManager,
+)
 from .messages.messages import Messages, AsyncMessages
 from ....types.beta.threads import Run
 
@@ -222,6 +235,7 @@ class Threads(SyncAPIResource):
             cast_to=ThreadDeleted,
         )
 
+    @overload
     def create_and_run(
         self,
         *,
@@ -229,6 +243,7 @@ class Threads(SyncAPIResource):
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         model: Optional[str] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -259,6 +274,126 @@ class Threads(SyncAPIResource):
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
           thread: If no thread is provided, an empty thread will be created.
 
           tools: Override the tools the assistant can use for this run. This is useful for
@@ -272,6 +407,26 @@ class Threads(SyncAPIResource):
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | Stream[AssistantStreamEvent]:
         extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
         return self._post(
             "/threads/runs",
@@ -281,6 +436,95 @@ class Threads(SyncAPIResource):
                     "instructions": instructions,
                     "metadata": metadata,
                     "model": model,
+                    "stream": stream,
+                    "thread": thread,
+                    "tools": tools,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=Stream[AssistantStreamEvent],
+        )
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        event_handler: AssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v1",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        make_request = partial(
+            self._post,
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "stream": True,
                     "thread": thread,
                     "tools": tools,
                 },
@@ -290,7 +534,10 @@ class Threads(SyncAPIResource):
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=Stream[AssistantStreamEvent],
         )
+        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
 
 
 class AsyncThreads(AsyncAPIResource):
@@ -467,6 +714,7 @@ class AsyncThreads(AsyncAPIResource):
             cast_to=ThreadDeleted,
         )
 
+    @overload
     async def create_and_run(
         self,
         *,
@@ -474,6 +722,7 @@ class AsyncThreads(AsyncAPIResource):
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         metadata: Optional[object] | NotGiven = NOT_GIVEN,
         model: Optional[str] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -504,6 +753,10 @@ class AsyncThreads(AsyncAPIResource):
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
           thread: If no thread is provided, an empty thread will be created.
 
           tools: Override the tools the assistant can use for this run. This is useful for
@@ -517,6 +770,142 @@ class AsyncThreads(AsyncAPIResource):
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        ...
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: Literal[True],
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        stream: bool,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a thread and run it in one request.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          instructions: Override the default system message of the assistant. This is useful for
+              modifying the behavior on a per-run basis.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format. Keys
+              can be a maximum of 64 characters long and values can be a maxium of 512
+              characters long.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          thread: If no thread is provided, an empty thread will be created.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create_and_run(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
         extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
         return await self._post(
             "/threads/runs",
@@ -526,6 +915,97 @@ class AsyncThreads(AsyncAPIResource):
                     "instructions": instructions,
                     "metadata": metadata,
                     "model": model,
+                    "stream": stream,
+                    "thread": thread,
+                    "tools": tools,
+                },
+                thread_create_and_run_params.ThreadCreateAndRunParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+            stream=stream or False,
+            stream_cls=AsyncStream[AssistantStreamEvent],
+        )
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+        """Create a thread and stream the run back"""
+        ...
+
+    @overload
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
+        """Create a thread and stream the run back"""
+        ...
+
+    def create_and_run_stream(
+        self,
+        *,
+        assistant_id: str,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        model: Optional[str] | NotGiven = NOT_GIVEN,
+        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        event_handler: AsyncAssistantEventHandlerT | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> (
+        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
+        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
+    ):
+        """Create a thread and stream the run back"""
+        extra_headers = {
+            "OpenAI-Beta": "assistants=v1",
+            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
+            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            "/threads/runs",
+            body=maybe_transform(
+                {
+                    "assistant_id": assistant_id,
+                    "instructions": instructions,
+                    "metadata": metadata,
+                    "model": model,
+                    "stream": True,
                     "thread": thread,
                     "tools": tools,
                 },
@@ -535,7 +1015,10 @@ class AsyncThreads(AsyncAPIResource):
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
+            stream=True,
+            stream_cls=AsyncStream[AssistantStreamEvent],
         )
+        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
 
 class ThreadsWithRawResponse:
src/openai/resources/chat/completions.py
@@ -206,7 +206,7 @@ class Completions(SyncAPIResource):
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
           top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
               return at each token position, each with an associated log probability.
@@ -396,7 +396,7 @@ class Completions(SyncAPIResource):
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
           top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
               return at each token position, each with an associated log probability.
@@ -586,7 +586,7 @@ class Completions(SyncAPIResource):
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
           top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
               return at each token position, each with an associated log probability.
@@ -873,7 +873,7 @@ class AsyncCompletions(AsyncAPIResource):
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
           top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
               return at each token position, each with an associated log probability.
@@ -1063,7 +1063,7 @@ class AsyncCompletions(AsyncAPIResource):
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
           top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
               return at each token position, each with an associated log probability.
@@ -1253,7 +1253,7 @@ class AsyncCompletions(AsyncAPIResource):
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
-              for.
+              for. A max of 128 functions are supported.
 
           top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
               return at each token position, each with an associated log probability.
src/openai/resources/completions.py
@@ -157,6 +157,8 @@ class Completions(SyncAPIResource):
 
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -305,6 +307,8 @@ class Completions(SyncAPIResource):
 
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -453,6 +457,8 @@ class Completions(SyncAPIResource):
 
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -671,6 +677,8 @@ class AsyncCompletions(AsyncAPIResource):
 
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -819,6 +827,8 @@ class AsyncCompletions(AsyncAPIResource):
 
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
@@ -967,6 +977,8 @@ class AsyncCompletions(AsyncAPIResource):
 
           suffix: The suffix that comes after a completion of inserted text.
 
+              This parameter is only supported for `gpt-3.5-turbo-instruct`.
+
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
src/openai/types/beta/threads/runs/__init__.py
@@ -3,9 +3,20 @@
 from __future__ import annotations
 
 from .run_step import RunStep as RunStep
-from .code_tool_call import CodeToolCall as CodeToolCall
+from .tool_call import ToolCall as ToolCall
+from .run_step_delta import RunStepDelta as RunStepDelta
+from .tool_call_delta import ToolCallDelta as ToolCallDelta
 from .step_list_params import StepListParams as StepListParams
 from .function_tool_call import FunctionToolCall as FunctionToolCall
 from .retrieval_tool_call import RetrievalToolCall as RetrievalToolCall
+from .run_step_delta_event import RunStepDeltaEvent as RunStepDeltaEvent
+from .code_interpreter_logs import CodeInterpreterLogs as CodeInterpreterLogs
+from .tool_call_delta_object import ToolCallDeltaObject as ToolCallDeltaObject
 from .tool_calls_step_details import ToolCallsStepDetails as ToolCallsStepDetails
+from .function_tool_call_delta import FunctionToolCallDelta as FunctionToolCallDelta
+from .retrieval_tool_call_delta import RetrievalToolCallDelta as RetrievalToolCallDelta
+from .code_interpreter_tool_call import CodeInterpreterToolCall as CodeInterpreterToolCall
+from .run_step_delta_message_delta import RunStepDeltaMessageDelta as RunStepDeltaMessageDelta
+from .code_interpreter_output_image import CodeInterpreterOutputImage as CodeInterpreterOutputImage
 from .message_creation_step_details import MessageCreationStepDetails as MessageCreationStepDetails
+from .code_interpreter_tool_call_delta import CodeInterpreterToolCallDelta as CodeInterpreterToolCallDelta
src/openai/types/beta/threads/runs/code_interpreter_logs.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["CodeInterpreterLogs"]
+
+
+class CodeInterpreterLogs(BaseModel):
+    index: int
+    """The index of the output in the outputs array."""
+
+    type: Literal["logs"]
+    """Always `logs`."""
+
+    logs: Optional[str] = None
+    """The text output from the Code Interpreter tool call."""
src/openai/types/beta/threads/runs/code_interpreter_output_image.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["CodeInterpreterOutputImage", "Image"]
+
+
+class Image(BaseModel):
+    file_id: Optional[str] = None
+    """
+    The [file](https://platform.openai.com/docs/api-reference/files) ID of the
+    image.
+    """
+
+
+class CodeInterpreterOutputImage(BaseModel):
+    index: int
+    """The index of the output in the outputs array."""
+
+    type: Literal["image"]
+    """Always `image`."""
+
+    image: Optional[Image] = None
src/openai/types/beta/threads/runs/code_tool_call.py โ†’ src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
@@ -7,7 +7,7 @@ from ....._utils import PropertyInfo
 from ....._models import BaseModel
 
 __all__ = [
-    "CodeToolCall",
+    "CodeInterpreterToolCall",
     "CodeInterpreter",
     "CodeInterpreterOutput",
     "CodeInterpreterOutputLogs",
@@ -56,7 +56,7 @@ class CodeInterpreter(BaseModel):
     """
 
 
-class CodeToolCall(BaseModel):
+class CodeInterpreterToolCall(BaseModel):
     id: str
     """The ID of the tool call."""
 
src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated
+
+from ....._utils import PropertyInfo
+from ....._models import BaseModel
+from .code_interpreter_logs import CodeInterpreterLogs
+from .code_interpreter_output_image import CodeInterpreterOutputImage
+
+__all__ = ["CodeInterpreterToolCallDelta", "CodeInterpreter", "CodeInterpreterOutput"]
+
+CodeInterpreterOutput = Annotated[
+    Union[CodeInterpreterLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
+]
+
+
+class CodeInterpreter(BaseModel):
+    input: Optional[str] = None
+    """The input to the Code Interpreter tool call."""
+
+    outputs: Optional[List[CodeInterpreterOutput]] = None
+    """The outputs from the Code Interpreter tool call.
+
+    Code Interpreter can output one or more items, including text (`logs`) or images
+    (`image`). Each of these are represented by a different object type.
+    """
+
+
+class CodeInterpreterToolCallDelta(BaseModel):
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["code_interpreter"]
+    """The type of tool call.
+
+    This is always going to be `code_interpreter` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call."""
+
+    code_interpreter: Optional[CodeInterpreter] = None
+    """The Code Interpreter tool call definition."""
src/openai/types/beta/threads/runs/function_tool_call_delta.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["FunctionToolCallDelta", "Function"]
+
+
+class Function(BaseModel):
+    arguments: Optional[str] = None
+    """The arguments passed to the function."""
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    output: Optional[str] = None
+    """The output of the function.
+
+    This will be `null` if the outputs have not been
+    [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs)
+    yet.
+    """
+
+
+class FunctionToolCallDelta(BaseModel):
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["function"]
+    """The type of tool call.
+
+    This is always going to be `function` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call object."""
+
+    function: Optional[Function] = None
+    """The definition of the function that was called."""
src/openai/types/beta/threads/runs/retrieval_tool_call_delta.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["RetrievalToolCallDelta"]
+
+
+class RetrievalToolCallDelta(BaseModel):
+    index: int
+    """The index of the tool call in the tool calls array."""
+
+    type: Literal["retrieval"]
+    """The type of tool call.
+
+    This is always going to be `retrieval` for this type of tool call.
+    """
+
+    id: Optional[str] = None
+    """The ID of the tool call object."""
+
+    retrieval: Optional[object] = None
+    """For now, this is always going to be an empty object."""
src/openai/types/beta/threads/runs/run_step_delta.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Union, Optional
+from typing_extensions import Annotated
+
+from ....._utils import PropertyInfo
+from ....._models import BaseModel
+from .tool_call_delta_object import ToolCallDeltaObject
+from .run_step_delta_message_delta import RunStepDeltaMessageDelta
+
+__all__ = ["RunStepDelta", "StepDetails"]
+
+StepDetails = Annotated[Union[RunStepDeltaMessageDelta, ToolCallDeltaObject], PropertyInfo(discriminator="type")]
+
+
+class RunStepDelta(BaseModel):
+    step_details: Optional[StepDetails] = None
+    """The details of the run step."""
src/openai/types/beta/threads/runs/run_step_delta_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+from .run_step_delta import RunStepDelta
+
+__all__ = ["RunStepDeltaEvent"]
+
+
+class RunStepDeltaEvent(BaseModel):
+    id: str
+    """The identifier of the run step, which can be referenced in API endpoints."""
+
+    delta: RunStepDelta
+    """The delta containing the fields that have changed on the run step."""
+
+    object: Literal["thread.run.step.delta"]
+    """The object type, which is always `thread.run.step.delta`."""
src/openai/types/beta/threads/runs/run_step_delta_message_delta.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+
+__all__ = ["RunStepDeltaMessageDelta", "MessageCreation"]
+
+
+class MessageCreation(BaseModel):
+    message_id: Optional[str] = None
+    """The ID of the message that was created by this run step."""
+
+
+class RunStepDeltaMessageDelta(BaseModel):
+    type: Literal["message_creation"]
+    """Always `message_creation`."""
+
+    message_creation: Optional[MessageCreation] = None
src/openai/types/beta/threads/runs/tool_call.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ....._utils import PropertyInfo
+from .function_tool_call import FunctionToolCall
+from .retrieval_tool_call import RetrievalToolCall
+from .code_interpreter_tool_call import CodeInterpreterToolCall
+
+__all__ = ["ToolCall"]
+
+ToolCall = Annotated[
+    Union[CodeInterpreterToolCall, RetrievalToolCall, FunctionToolCall], PropertyInfo(discriminator="type")
+]
src/openai/types/beta/threads/runs/tool_call_delta.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ....._utils import PropertyInfo
+from .function_tool_call_delta import FunctionToolCallDelta
+from .retrieval_tool_call_delta import RetrievalToolCallDelta
+from .code_interpreter_tool_call_delta import CodeInterpreterToolCallDelta
+
+__all__ = ["ToolCallDelta"]
+
+ToolCallDelta = Annotated[
+    Union[CodeInterpreterToolCallDelta, RetrievalToolCallDelta, FunctionToolCallDelta],
+    PropertyInfo(discriminator="type"),
+]
src/openai/types/beta/threads/runs/tool_call_delta_object.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ....._models import BaseModel
+from .tool_call_delta import ToolCallDelta
+
+__all__ = ["ToolCallDeltaObject"]
+
+
+class ToolCallDeltaObject(BaseModel):
+    type: Literal["tool_calls"]
+    """Always `tool_calls`."""
+
+    tool_calls: Optional[List[ToolCallDelta]] = None
+    """An array of tool calls the run step was involved in.
+
+    These can be associated with one of three types of tools: `code_interpreter`,
+    `retrieval`, or `function`.
+    """
src/openai/types/beta/threads/runs/tool_calls_step_details.py
@@ -1,17 +1,12 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from typing import List, Union
-from typing_extensions import Literal, Annotated
+from typing import List
+from typing_extensions import Literal
 
-from ....._utils import PropertyInfo
+from .tool_call import ToolCall
 from ....._models import BaseModel
-from .code_tool_call import CodeToolCall
-from .function_tool_call import FunctionToolCall
-from .retrieval_tool_call import RetrievalToolCall
 
-__all__ = ["ToolCallsStepDetails", "ToolCall"]
-
-ToolCall = Annotated[Union[CodeToolCall, RetrievalToolCall, FunctionToolCall], PropertyInfo(discriminator="type")]
+__all__ = ["ToolCallsStepDetails"]
 
 
 class ToolCallsStepDetails(BaseModel):
src/openai/types/beta/threads/__init__.py
@@ -3,15 +3,31 @@
 from __future__ import annotations
 
 from .run import Run as Run
+from .text import Text as Text
+from .message import Message as Message
+from .annotation import Annotation as Annotation
+from .image_file import ImageFile as ImageFile
 from .run_status import RunStatus as RunStatus
-from .thread_message import ThreadMessage as ThreadMessage
+from .text_delta import TextDelta as TextDelta
+from .message_delta import MessageDelta as MessageDelta
+from .message_content import MessageContent as MessageContent
 from .run_list_params import RunListParams as RunListParams
+from .annotation_delta import AnnotationDelta as AnnotationDelta
+from .image_file_delta import ImageFileDelta as ImageFileDelta
+from .text_delta_block import TextDeltaBlock as TextDeltaBlock
 from .run_create_params import RunCreateParams as RunCreateParams
 from .run_update_params import RunUpdateParams as RunUpdateParams
+from .text_content_block import TextContentBlock as TextContentBlock
+from .message_delta_event import MessageDeltaEvent as MessageDeltaEvent
 from .message_list_params import MessageListParams as MessageListParams
-from .message_content_text import MessageContentText as MessageContentText
+from .file_path_annotation import FilePathAnnotation as FilePathAnnotation
+from .message_content_delta import MessageContentDelta as MessageContentDelta
 from .message_create_params import MessageCreateParams as MessageCreateParams
 from .message_update_params import MessageUpdateParams as MessageUpdateParams
-from .message_content_image_file import MessageContentImageFile as MessageContentImageFile
+from .image_file_delta_block import ImageFileDeltaBlock as ImageFileDeltaBlock
+from .file_citation_annotation import FileCitationAnnotation as FileCitationAnnotation
+from .image_file_content_block import ImageFileContentBlock as ImageFileContentBlock
+from .file_path_delta_annotation import FilePathDeltaAnnotation as FilePathDeltaAnnotation
+from .file_citation_delta_annotation import FileCitationDeltaAnnotation as FileCitationDeltaAnnotation
 from .run_submit_tool_outputs_params import RunSubmitToolOutputsParams as RunSubmitToolOutputsParams
 from .required_action_function_tool_call import RequiredActionFunctionToolCall as RequiredActionFunctionToolCall
src/openai/types/beta/threads/annotation.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .file_path_annotation import FilePathAnnotation
+from .file_citation_annotation import FileCitationAnnotation
+
+__all__ = ["Annotation"]
+
+Annotation = Annotated[Union[FileCitationAnnotation, FilePathAnnotation], PropertyInfo(discriminator="type")]
src/openai/types/beta/threads/annotation_delta.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .file_path_delta_annotation import FilePathDeltaAnnotation
+from .file_citation_delta_annotation import FileCitationDeltaAnnotation
+
+__all__ = ["AnnotationDelta"]
+
+AnnotationDelta = Annotated[
+    Union[FileCitationDeltaAnnotation, FilePathDeltaAnnotation], PropertyInfo(discriminator="type")
+]
src/openai/types/beta/threads/file_citation_annotation.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FileCitationAnnotation", "FileCitation"]
+
+
+class FileCitation(BaseModel):
+    file_id: str
+    """The ID of the specific File the citation is from."""
+
+    quote: str
+    """The specific quote in the file."""
+
+
+class FileCitationAnnotation(BaseModel):
+    end_index: int
+
+    file_citation: FileCitation
+
+    start_index: int
+
+    text: str
+    """The text in the message content that needs to be replaced."""
+
+    type: Literal["file_citation"]
+    """Always `file_citation`."""
src/openai/types/beta/threads/file_citation_delta_annotation.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FileCitationDeltaAnnotation", "FileCitation"]
+
+
+class FileCitation(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the specific File the citation is from."""
+
+    quote: Optional[str] = None
+    """The specific quote in the file."""
+
+
+class FileCitationDeltaAnnotation(BaseModel):
+    index: int
+    """The index of the annotation in the text content part."""
+
+    type: Literal["file_citation"]
+    """Always `file_citation`."""
+
+    end_index: Optional[int] = None
+
+    file_citation: Optional[FileCitation] = None
+
+    start_index: Optional[int] = None
+
+    text: Optional[str] = None
+    """The text in the message content that needs to be replaced."""
src/openai/types/beta/threads/file_path_annotation.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FilePathAnnotation", "FilePath"]
+
+
+class FilePath(BaseModel):
+    file_id: str
+    """The ID of the file that was generated."""
+
+
+class FilePathAnnotation(BaseModel):
+    end_index: int
+
+    file_path: FilePath
+
+    start_index: int
+
+    text: str
+    """The text in the message content that needs to be replaced."""
+
+    type: Literal["file_path"]
+    """Always `file_path`."""
src/openai/types/beta/threads/file_path_delta_annotation.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["FilePathDeltaAnnotation", "FilePath"]
+
+
+class FilePath(BaseModel):
+    file_id: Optional[str] = None
+    """The ID of the file that was generated."""
+
+
+class FilePathDeltaAnnotation(BaseModel):
+    index: int
+    """The index of the annotation in the text content part."""
+
+    type: Literal["file_path"]
+    """Always `file_path`."""
+
+    end_index: Optional[int] = None
+
+    file_path: Optional[FilePath] = None
+
+    start_index: Optional[int] = None
+
+    text: Optional[str] = None
+    """The text in the message content that needs to be replaced."""
src/openai/types/beta/threads/message_content_image_file.py โ†’ src/openai/types/beta/threads/image_file.py
@@ -1,10 +1,8 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from typing_extensions import Literal
-
 from ...._models import BaseModel
 
-__all__ = ["MessageContentImageFile", "ImageFile"]
+__all__ = ["ImageFile"]
 
 
 class ImageFile(BaseModel):
@@ -13,10 +11,3 @@ class ImageFile(BaseModel):
     The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
     in the message content.
     """
-
-
-class MessageContentImageFile(BaseModel):
-    image_file: ImageFile
-
-    type: Literal["image_file"]
-    """Always `image_file`."""
src/openai/types/beta/threads/image_file_content_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .image_file import ImageFile
+
+__all__ = ["ImageFileContentBlock"]
+
+
+class ImageFileContentBlock(BaseModel):
+    image_file: ImageFile
+
+    type: Literal["image_file"]
+    """Always `image_file`."""
src/openai/types/beta/threads/image_file_delta.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+
+from ...._models import BaseModel
+
+__all__ = ["ImageFileDelta"]
+
+
+class ImageFileDelta(BaseModel):
+    file_id: Optional[str] = None
+    """
+    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
+    in the message content.
+    """
src/openai/types/beta/threads/image_file_delta_block.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .image_file_delta import ImageFileDelta
+
+__all__ = ["ImageFileDeltaBlock"]
+
+
+class ImageFileDeltaBlock(BaseModel):
+    index: int
+    """The index of the content part in the message."""
+
+    type: Literal["image_file"]
+    """Always `image_file`."""
+
+    image_file: Optional[ImageFileDelta] = None
src/openai/types/beta/threads/thread_message.py โ†’ src/openai/types/beta/threads/message.py
@@ -1,19 +1,20 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated
+from typing import List, Optional
+from typing_extensions import Literal
 
-from ...._utils import PropertyInfo
 from ...._models import BaseModel
-from .message_content_text import MessageContentText
-from .message_content_image_file import MessageContentImageFile
+from .message_content import MessageContent
 
-__all__ = ["ThreadMessage", "Content"]
+__all__ = ["Message", "IncompleteDetails"]
 
-Content = Annotated[Union[MessageContentImageFile, MessageContentText], PropertyInfo(discriminator="type")]
 
+class IncompleteDetails(BaseModel):
+    reason: Literal["content_filter", "max_tokens", "run_cancelled", "run_expired", "run_failed"]
+    """The reason the message is incomplete."""
 
-class ThreadMessage(BaseModel):
+
+class Message(BaseModel):
     id: str
     """The identifier, which can be referenced in API endpoints."""
 
@@ -24,7 +25,10 @@ class ThreadMessage(BaseModel):
     authored this message.
     """
 
-    content: List[Content]
+    completed_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the message was completed."""
+
+    content: List[MessageContent]
     """The content of the message in array of text and/or images."""
 
     created_at: int
@@ -37,6 +41,12 @@ class ThreadMessage(BaseModel):
     that can access files. A maximum of 10 files can be attached to a message.
     """
 
+    incomplete_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the message was marked as incomplete."""
+
+    incomplete_details: Optional[IncompleteDetails] = None
+    """On an incomplete message, details about why the message is incomplete."""
+
     metadata: Optional[object] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
@@ -58,6 +68,12 @@ class ThreadMessage(BaseModel):
     authoring of this message.
     """
 
+    status: Literal["in_progress", "incomplete", "completed"]
+    """
+    The status of the message, which can be either `in_progress`, `incomplete`, or
+    `completed`.
+    """
+
     thread_id: str
     """
     The [thread](https://platform.openai.com/docs/api-reference/threads) ID that
src/openai/types/beta/threads/message_content.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .text_content_block import TextContentBlock
+from .image_file_content_block import ImageFileContentBlock
+
+__all__ = ["MessageContent"]
+
+MessageContent = Annotated[Union[ImageFileContentBlock, TextContentBlock], PropertyInfo(discriminator="type")]
src/openai/types/beta/threads/message_content_delta.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ...._utils import PropertyInfo
+from .text_delta_block import TextDeltaBlock
+from .image_file_delta_block import ImageFileDeltaBlock
+
+__all__ = ["MessageContentDelta"]
+
+MessageContentDelta = Annotated[Union[ImageFileDeltaBlock, TextDeltaBlock], PropertyInfo(discriminator="type")]
src/openai/types/beta/threads/message_content_text.py
@@ -1,77 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing import List, Union
-from typing_extensions import Literal, Annotated
-
-from ...._utils import PropertyInfo
-from ...._models import BaseModel
-
-__all__ = [
-    "MessageContentText",
-    "Text",
-    "TextAnnotation",
-    "TextAnnotationFileCitation",
-    "TextAnnotationFileCitationFileCitation",
-    "TextAnnotationFilePath",
-    "TextAnnotationFilePathFilePath",
-]
-
-
-class TextAnnotationFileCitationFileCitation(BaseModel):
-    file_id: str
-    """The ID of the specific File the citation is from."""
-
-    quote: str
-    """The specific quote in the file."""
-
-
-class TextAnnotationFileCitation(BaseModel):
-    end_index: int
-
-    file_citation: TextAnnotationFileCitationFileCitation
-
-    start_index: int
-
-    text: str
-    """The text in the message content that needs to be replaced."""
-
-    type: Literal["file_citation"]
-    """Always `file_citation`."""
-
-
-class TextAnnotationFilePathFilePath(BaseModel):
-    file_id: str
-    """The ID of the file that was generated."""
-
-
-class TextAnnotationFilePath(BaseModel):
-    end_index: int
-
-    file_path: TextAnnotationFilePathFilePath
-
-    start_index: int
-
-    text: str
-    """The text in the message content that needs to be replaced."""
-
-    type: Literal["file_path"]
-    """Always `file_path`."""
-
-
-TextAnnotation = Annotated[
-    Union[TextAnnotationFileCitation, TextAnnotationFilePath], PropertyInfo(discriminator="type")
-]
-
-
-class Text(BaseModel):
-    annotations: List[TextAnnotation]
-
-    value: str
-    """The data that makes up the text."""
-
-
-class MessageContentText(BaseModel):
-    text: Text
-
-    type: Literal["text"]
-    """Always `text`."""
src/openai/types/beta/threads/message_delta.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .message_content_delta import MessageContentDelta
+
+__all__ = ["MessageDelta"]
+
+
+class MessageDelta(BaseModel):
+    content: Optional[List[MessageContentDelta]] = None
+    """The content of the message in array of text and/or images."""
+
+    file_ids: Optional[List[str]] = None
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs that
+    the assistant should use. Useful for tools like retrieval and code_interpreter
+    that can access files. A maximum of 10 files can be attached to a message.
+    """
+
+    role: Optional[Literal["user", "assistant"]] = None
+    """The entity that produced the message. One of `user` or `assistant`."""
src/openai/types/beta/threads/message_delta_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .message_delta import MessageDelta
+
+__all__ = ["MessageDeltaEvent"]
+
+
+class MessageDeltaEvent(BaseModel):
+    id: str
+    """The identifier of the message, which can be referenced in API endpoints."""
+
+    delta: MessageDelta
+    """The delta containing the fields that have changed on the Message."""
+
+    object: Literal["thread.message.delta"]
+    """The object type, which is always `thread.message.delta`."""
src/openai/types/beta/threads/run.py
@@ -1,24 +1,14 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from typing import List, Union, Optional
+from typing import List, Optional
 from typing_extensions import Literal
 
-from ...shared import FunctionDefinition
 from ...._models import BaseModel
 from .run_status import RunStatus
+from ..assistant_tool import AssistantTool
 from .required_action_function_tool_call import RequiredActionFunctionToolCall
 
-__all__ = [
-    "Run",
-    "LastError",
-    "RequiredAction",
-    "RequiredActionSubmitToolOutputs",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
-    "Usage",
-]
+__all__ = ["Run", "LastError", "RequiredAction", "RequiredActionSubmitToolOutputs", "Usage"]
 
 
 class LastError(BaseModel):
@@ -42,26 +32,6 @@ class RequiredAction(BaseModel):
     """For now, this is always `submit_tool_outputs`."""
 
 
-class ToolAssistantToolsCode(BaseModel):
-    type: Literal["code_interpreter"]
-    """The type of tool being defined: `code_interpreter`"""
-
-
-class ToolAssistantToolsRetrieval(BaseModel):
-    type: Literal["retrieval"]
-    """The type of tool being defined: `retrieval`"""
-
-
-class ToolAssistantToolsFunction(BaseModel):
-    function: FunctionDefinition
-
-    type: Literal["function"]
-    """The type of tool being defined: `function`"""
-
-
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
-
-
 class Usage(BaseModel):
     completion_tokens: int
     """Number of completion tokens used over the course of the run."""
@@ -93,7 +63,7 @@ class Run(BaseModel):
     created_at: int
     """The Unix timestamp (in seconds) for when the run was created."""
 
-    expires_at: int
+    expires_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the run will expire."""
 
     failed_at: Optional[int] = None
@@ -156,7 +126,7 @@ class Run(BaseModel):
     that was executed on as a part of this run.
     """
 
-    tools: List[Tool]
+    tools: List[AssistantTool]
     """
     The list of tools that the
     [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
src/openai/types/beta/threads/run_create_params.py
@@ -5,18 +5,12 @@ from __future__ import annotations
 from typing import Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-from ....types import shared_params
+from ..assistant_tool_param import AssistantToolParam
 
-__all__ = [
-    "RunCreateParams",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
-]
+__all__ = ["RunCreateParamsBase", "RunCreateParamsNonStreaming", "RunCreateParamsStreaming"]
 
 
-class RunCreateParams(TypedDict, total=False):
+class RunCreateParamsBase(TypedDict, total=False):
     assistant_id: Required[str]
     """
     The ID of the
@@ -54,28 +48,29 @@ class RunCreateParams(TypedDict, total=False):
     assistant will be used.
     """
 
-    tools: Optional[Iterable[Tool]]
+    tools: Optional[Iterable[AssistantToolParam]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
     """
 
 
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
-
-
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
-
+class RunCreateParamsNonStreaming(RunCreateParamsBase):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
 
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
 
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
+class RunCreateParamsStreaming(RunCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
 
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+RunCreateParams = Union[RunCreateParamsNonStreaming, RunCreateParamsStreaming]
src/openai/types/beta/threads/run_submit_tool_outputs_params.py
@@ -2,13 +2,18 @@
 
 from __future__ import annotations
 
-from typing import Iterable
-from typing_extensions import Required, TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["RunSubmitToolOutputsParams", "ToolOutput"]
+__all__ = [
+    "RunSubmitToolOutputsParamsBase",
+    "ToolOutput",
+    "RunSubmitToolOutputsParamsNonStreaming",
+    "RunSubmitToolOutputsParamsStreaming",
+]
 
 
-class RunSubmitToolOutputsParams(TypedDict, total=False):
+class RunSubmitToolOutputsParamsBase(TypedDict, total=False):
     thread_id: Required[str]
 
     tool_outputs: Required[Iterable[ToolOutput]]
@@ -24,3 +29,24 @@ class ToolOutput(TypedDict, total=False):
     The ID of the tool call in the `required_action` object within the run object
     the output is being submitted for.
     """
+
+
+class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+class RunSubmitToolOutputsParamsStreaming(RunSubmitToolOutputsParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
+
+
+RunSubmitToolOutputsParams = Union[RunSubmitToolOutputsParamsNonStreaming, RunSubmitToolOutputsParamsStreaming]
src/openai/types/beta/threads/text.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import List
+
+from ...._models import BaseModel
+from .annotation import Annotation
+
+__all__ = ["Text"]
+
+
+class Text(BaseModel):
+    annotations: List[Annotation]
+
+    value: str
+    """The data that makes up the text."""
src/openai/types/beta/threads/text_content_block.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing_extensions import Literal
+
+from .text import Text
+from ...._models import BaseModel
+
+__all__ = ["TextContentBlock"]
+
+
+class TextContentBlock(BaseModel):
+    text: Text
+
+    type: Literal["text"]
+    """Always `text`."""
src/openai/types/beta/threads/text_delta.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import List, Optional
+
+from ...._models import BaseModel
+from .annotation_delta import AnnotationDelta
+
+__all__ = ["TextDelta"]
+
+
+class TextDelta(BaseModel):
+    annotations: Optional[List[AnnotationDelta]] = None
+
+    value: Optional[str] = None
+    """The data that makes up the text."""
src/openai/types/beta/threads/text_delta_block.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .text_delta import TextDelta
+
+__all__ = ["TextDeltaBlock"]
+
+
+class TextDeltaBlock(BaseModel):
+    index: int
+    """The index of the content part in the message."""
+
+    type: Literal["text"]
+    """Always `text`."""
+
+    text: Optional[TextDelta] = None
src/openai/types/beta/__init__.py
@@ -4,11 +4,20 @@ from __future__ import annotations
 
 from .thread import Thread as Thread
 from .assistant import Assistant as Assistant
+from .function_tool import FunctionTool as FunctionTool
+from .assistant_tool import AssistantTool as AssistantTool
+from .retrieval_tool import RetrievalTool as RetrievalTool
 from .thread_deleted import ThreadDeleted as ThreadDeleted
 from .assistant_deleted import AssistantDeleted as AssistantDeleted
+from .function_tool_param import FunctionToolParam as FunctionToolParam
+from .assistant_tool_param import AssistantToolParam as AssistantToolParam
+from .retrieval_tool_param import RetrievalToolParam as RetrievalToolParam
 from .thread_create_params import ThreadCreateParams as ThreadCreateParams
 from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
 from .assistant_list_params import AssistantListParams as AssistantListParams
+from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
+from .assistant_stream_event import AssistantStreamEvent as AssistantStreamEvent
 from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
 from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
+from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
 from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
src/openai/types/beta/assistant.py
@@ -1,33 +1,12 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated
+from typing import List, Optional
+from typing_extensions import Literal
 
-from ..shared import FunctionDefinition
-from ..._utils import PropertyInfo
 from ..._models import BaseModel
+from .assistant_tool import AssistantTool
 
-__all__ = ["Assistant", "Tool", "ToolCodeInterpreter", "ToolRetrieval", "ToolFunction"]
-
-
-class ToolCodeInterpreter(BaseModel):
-    type: Literal["code_interpreter"]
-    """The type of tool being defined: `code_interpreter`"""
-
-
-class ToolRetrieval(BaseModel):
-    type: Literal["retrieval"]
-    """The type of tool being defined: `retrieval`"""
-
-
-class ToolFunction(BaseModel):
-    function: FunctionDefinition
-
-    type: Literal["function"]
-    """The type of tool being defined: `function`"""
-
-
-Tool = Annotated[Union[ToolCodeInterpreter, ToolRetrieval, ToolFunction], PropertyInfo(discriminator="type")]
+__all__ = ["Assistant"]
 
 
 class Assistant(BaseModel):
@@ -77,7 +56,7 @@ class Assistant(BaseModel):
     object: Literal["assistant"]
     """The object type, which is always `assistant`."""
 
-    tools: List[Tool]
+    tools: List[AssistantTool]
     """A list of tool enabled on the assistant.
 
     There can be a maximum of 128 tools per assistant. Tools can be of types
src/openai/types/beta/assistant_create_params.py
@@ -2,18 +2,12 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Iterable, Optional
+from typing_extensions import Required, TypedDict
 
-from ...types import shared_params
+from .assistant_tool_param import AssistantToolParam
 
-__all__ = [
-    "AssistantCreateParams",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
-]
+__all__ = ["AssistantCreateParams"]
 
 
 class AssistantCreateParams(TypedDict, total=False):
@@ -54,29 +48,9 @@ class AssistantCreateParams(TypedDict, total=False):
     name: Optional[str]
     """The name of the assistant. The maximum length is 256 characters."""
 
-    tools: Iterable[Tool]
+    tools: Iterable[AssistantToolParam]
     """A list of tool enabled on the assistant.
 
     There can be a maximum of 128 tools per assistant. Tools can be of types
     `code_interpreter`, `retrieval`, or `function`.
     """
-
-
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
-
-
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
-
-
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
-
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
-
-
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
src/openai/types/beta/assistant_stream_event.py
@@ -0,0 +1,276 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Union
+from typing_extensions import Literal, Annotated
+
+from .thread import Thread
+from ..shared import ErrorObject
+from .threads import Run, Message, MessageDeltaEvent
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .threads.runs import RunStep, RunStepDeltaEvent
+
+__all__ = [
+    "AssistantStreamEvent",
+    "ThreadCreated",
+    "ThreadRunCreated",
+    "ThreadRunQueued",
+    "ThreadRunInProgress",
+    "ThreadRunRequiresAction",
+    "ThreadRunCompleted",
+    "ThreadRunFailed",
+    "ThreadRunCancelling",
+    "ThreadRunCancelled",
+    "ThreadRunExpired",
+    "ThreadRunStepCreated",
+    "ThreadRunStepInProgress",
+    "ThreadRunStepDelta",
+    "ThreadRunStepCompleted",
+    "ThreadRunStepFailed",
+    "ThreadRunStepCancelled",
+    "ThreadRunStepExpired",
+    "ThreadMessageCreated",
+    "ThreadMessageInProgress",
+    "ThreadMessageDelta",
+    "ThreadMessageCompleted",
+    "ThreadMessageIncomplete",
+    "ErrorEvent",
+]
+
+
+class ThreadCreated(BaseModel):
+    data: Thread
+    """
+    Represents a thread that contains
+    [messages](https://platform.openai.com/docs/api-reference/messages).
+    """
+
+    event: Literal["thread.created"]
+
+
+class ThreadRunCreated(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.created"]
+
+
+class ThreadRunQueued(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.queued"]
+
+
+class ThreadRunInProgress(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.in_progress"]
+
+
+class ThreadRunRequiresAction(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.requires_action"]
+
+
+class ThreadRunCompleted(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.completed"]
+
+
+class ThreadRunFailed(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.failed"]
+
+
+class ThreadRunCancelling(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.cancelling"]
+
+
+class ThreadRunCancelled(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.cancelled"]
+
+
+class ThreadRunExpired(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.expired"]
+
+
+class ThreadRunStepCreated(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.created"]
+
+
+class ThreadRunStepInProgress(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.in_progress"]
+
+
+class ThreadRunStepDelta(BaseModel):
+    data: RunStepDeltaEvent
+    """Represents a run step delta i.e.
+
+    any changed fields on a run step during streaming.
+    """
+
+    event: Literal["thread.run.step.delta"]
+
+
+class ThreadRunStepCompleted(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.completed"]
+
+
+class ThreadRunStepFailed(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.failed"]
+
+
+class ThreadRunStepCancelled(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.cancelled"]
+
+
+class ThreadRunStepExpired(BaseModel):
+    data: RunStep
+    """Represents a step in execution of a run."""
+
+    event: Literal["thread.run.step.expired"]
+
+
+class ThreadMessageCreated(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.created"]
+
+
+class ThreadMessageInProgress(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.in_progress"]
+
+
+class ThreadMessageDelta(BaseModel):
+    data: MessageDeltaEvent
+    """Represents a message delta i.e.
+
+    any changed fields on a message during streaming.
+    """
+
+    event: Literal["thread.message.delta"]
+
+
+class ThreadMessageCompleted(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.completed"]
+
+
+class ThreadMessageIncomplete(BaseModel):
+    data: Message
+    """
+    Represents a message within a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.message.incomplete"]
+
+
+class ErrorEvent(BaseModel):
+    data: ErrorObject
+
+    event: Literal["error"]
+
+
+AssistantStreamEvent = Annotated[
+    Union[
+        ThreadCreated,
+        ThreadRunCreated,
+        ThreadRunQueued,
+        ThreadRunInProgress,
+        ThreadRunRequiresAction,
+        ThreadRunCompleted,
+        ThreadRunFailed,
+        ThreadRunCancelling,
+        ThreadRunCancelled,
+        ThreadRunExpired,
+        ThreadRunStepCreated,
+        ThreadRunStepInProgress,
+        ThreadRunStepDelta,
+        ThreadRunStepCompleted,
+        ThreadRunStepFailed,
+        ThreadRunStepCancelled,
+        ThreadRunStepExpired,
+        ThreadMessageCreated,
+        ThreadMessageInProgress,
+        ThreadMessageDelta,
+        ThreadMessageCompleted,
+        ThreadMessageIncomplete,
+        ErrorEvent,
+    ],
+    PropertyInfo(discriminator="event"),
+]
src/openai/types/beta/assistant_tool.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Union
+from typing_extensions import Annotated
+
+from ..._utils import PropertyInfo
+from .function_tool import FunctionTool
+from .retrieval_tool import RetrievalTool
+from .code_interpreter_tool import CodeInterpreterTool
+
+__all__ = ["AssistantTool"]
+
+AssistantTool = Annotated[Union[CodeInterpreterTool, RetrievalTool, FunctionTool], PropertyInfo(discriminator="type")]
src/openai/types/beta/assistant_tool_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from __future__ import annotations
+
+from typing import Union
+
+from .function_tool_param import FunctionToolParam
+from .retrieval_tool_param import RetrievalToolParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam
+
+__all__ = ["AssistantToolParam"]
+
+AssistantToolParam = Union[CodeInterpreterToolParam, RetrievalToolParam, FunctionToolParam]
src/openai/types/beta/assistant_update_params.py
@@ -2,18 +2,12 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Iterable, Optional
+from typing_extensions import TypedDict
 
-from ...types import shared_params
+from .assistant_tool_param import AssistantToolParam
 
-__all__ = [
-    "AssistantUpdateParams",
-    "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
-]
+__all__ = ["AssistantUpdateParams"]
 
 
 class AssistantUpdateParams(TypedDict, total=False):
@@ -56,29 +50,9 @@ class AssistantUpdateParams(TypedDict, total=False):
     name: Optional[str]
     """The name of the assistant. The maximum length is 256 characters."""
 
-    tools: Iterable[Tool]
+    tools: Iterable[AssistantToolParam]
     """A list of tool enabled on the assistant.
 
     There can be a maximum of 128 tools per assistant. Tools can be of types
     `code_interpreter`, `retrieval`, or `function`.
     """
-
-
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
-
-
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
-
-
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
-
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
-
-
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
src/openai/types/beta/code_interpreter_tool.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["CodeInterpreterTool"]
+
+
+class CodeInterpreterTool(BaseModel):
+    type: Literal["code_interpreter"]
+    """The type of tool being defined: `code_interpreter`"""
src/openai/types/beta/code_interpreter_tool_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["CodeInterpreterToolParam"]
+
+
+class CodeInterpreterToolParam(TypedDict, total=False):
+    type: Required[Literal["code_interpreter"]]
+    """The type of tool being defined: `code_interpreter`"""
src/openai/types/beta/function_tool.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing_extensions import Literal
+
+from ..shared import FunctionDefinition
+from ..._models import BaseModel
+
+__all__ = ["FunctionTool"]
+
+
+class FunctionTool(BaseModel):
+    function: FunctionDefinition
+
+    type: Literal["function"]
+    """The type of tool being defined: `function`"""
src/openai/types/beta/function_tool_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from ...types import shared_params
+
+__all__ = ["FunctionToolParam"]
+
+
+class FunctionToolParam(TypedDict, total=False):
+    function: Required[shared_params.FunctionDefinition]
+
+    type: Required[Literal["function"]]
+    """The type of tool being defined: `function`"""
src/openai/types/beta/retrieval_tool.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RetrievalTool"]
+
+
+class RetrievalTool(BaseModel):
+    type: Literal["retrieval"]
+    """The type of tool being defined: `retrieval`"""
src/openai/types/beta/retrieval_tool_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["RetrievalToolParam"]
+
+
+class RetrievalToolParam(TypedDict, total=False):
+    type: Required[Literal["retrieval"]]
+    """The type of tool being defined: `retrieval`"""
src/openai/types/beta/thread_create_and_run_params.py
@@ -5,20 +5,21 @@ from __future__ import annotations
 from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-from ...types import shared_params
+from .function_tool_param import FunctionToolParam
+from .retrieval_tool_param import RetrievalToolParam
+from .code_interpreter_tool_param import CodeInterpreterToolParam
 
 __all__ = [
-    "ThreadCreateAndRunParams",
+    "ThreadCreateAndRunParamsBase",
     "Thread",
     "ThreadMessage",
     "Tool",
-    "ToolAssistantToolsCode",
-    "ToolAssistantToolsRetrieval",
-    "ToolAssistantToolsFunction",
+    "ThreadCreateAndRunParamsNonStreaming",
+    "ThreadCreateAndRunParamsStreaming",
 ]
 
 
-class ThreadCreateAndRunParams(TypedDict, total=False):
+class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     assistant_id: Required[str]
     """
     The ID of the
@@ -101,21 +102,25 @@ class Thread(TypedDict, total=False):
     """
 
 
-class ToolAssistantToolsCode(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
+Tool = Union[CodeInterpreterToolParam, RetrievalToolParam, FunctionToolParam]
 
 
-class ToolAssistantToolsRetrieval(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
-
+class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
+    stream: Optional[Literal[False]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
 
-class ToolAssistantToolsFunction(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
 
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
+class ThreadCreateAndRunParamsStreaming(ThreadCreateAndRunParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If `true`, returns a stream of events that happen during the Run as server-sent
+    events, terminating when the Run enters a terminal state with a `data: [DONE]`
+    message.
+    """
 
 
-Tool = Union[ToolAssistantToolsCode, ToolAssistantToolsRetrieval, ToolAssistantToolsFunction]
+ThreadCreateAndRunParams = Union[ThreadCreateAndRunParamsNonStreaming, ThreadCreateAndRunParamsStreaming]
src/openai/types/chat/completion_create_params.py
@@ -190,7 +190,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """A list of tools the model may call.
 
     Currently, only functions are supported as a tool. Use this to provide a list of
-    functions the model may generate JSON inputs for.
+    functions the model may generate JSON inputs for. A max of 128 functions are
+    supported.
     """
 
     top_logprobs: Optional[int]
src/openai/types/shared/__init__.py
@@ -1,4 +1,5 @@
 # File generated from our OpenAPI spec by Stainless.
 
+from .error_object import ErrorObject as ErrorObject
 from .function_definition import FunctionDefinition as FunctionDefinition
 from .function_parameters import FunctionParameters as FunctionParameters
src/openai/types/shared/error_object.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ErrorObject"]
+
+
+class ErrorObject(BaseModel):
+    code: Optional[str] = None
+
+    message: str
+
+    param: Optional[str] = None
+
+    type: str
src/openai/types/__init__.py
@@ -4,7 +4,11 @@ from __future__ import annotations
 
 from .image import Image as Image
 from .model import Model as Model
-from .shared import FunctionDefinition as FunctionDefinition, FunctionParameters as FunctionParameters
+from .shared import (
+    ErrorObject as ErrorObject,
+    FunctionDefinition as FunctionDefinition,
+    FunctionParameters as FunctionParameters,
+)
 from .embedding import Embedding as Embedding
 from .completion import Completion as Completion
 from .moderation import Moderation as Moderation
src/openai/types/completion_create_params.py
@@ -124,7 +124,10 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     suffix: Optional[str]
-    """The suffix that comes after a completion of inserted text."""
+    """The suffix that comes after a completion of inserted text.
+
+    This parameter is only supported for `gpt-3.5-turbo-instruct`.
+    """
 
     temperature: Optional[float]
     """What sampling temperature to use, between 0 and 2.
src/openai/__init__.py
@@ -69,6 +69,10 @@ from .lib import azure as _azure
 from .version import VERSION as VERSION
 from .lib.azure import AzureOpenAI as AzureOpenAI, AsyncAzureOpenAI as AsyncAzureOpenAI
 from .lib._old_api import *
+from .lib.streaming import (
+    AssistantEventHandler as AssistantEventHandler,
+    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
+)
 
 _setup_logging()
 
src/openai/_streaming.py
@@ -80,6 +80,25 @@ class Stream(Generic[_T]):
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
 
+            else:
+                data = sse.json()
+
+                if sse.event == "error" and is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIError(
+                        message=message,
+                        request=self.response.request,
+                        body=data["error"],
+                    )
+
+                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+
         # Ensure the entire stream is consumed
         for _sse in iterator:
             ...
@@ -167,6 +186,25 @@ class AsyncStream(Generic[_T]):
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
 
+            else:
+                data = sse.json()
+
+                if sse.event == "error" and is_mapping(data) and data.get("error"):
+                    message = None
+                    error = data.get("error")
+                    if is_mapping(error):
+                        message = error.get("message")
+                    if not message or not isinstance(message, str):
+                        message = "An error occurred during streaming"
+
+                    raise APIError(
+                        message=message,
+                        request=self.response.request,
+                        body=data["error"],
+                    )
+
+                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+
         # Ensure the entire stream is consumed
         async for _sse in iterator:
             ...
src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless.
 
 __title__ = "openai"
-__version__ = "1.13.4"  # x-release-please-version
+__version__ = "1.14.0"  # x-release-please-version
tests/api_resources/beta/threads/test_messages.py
@@ -10,7 +10,7 @@ import pytest
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.threads import ThreadMessage
+from openai.types.beta.threads import Message
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -25,7 +25,7 @@ class TestMessages:
             content="x",
             role="user",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
@@ -36,7 +36,7 @@ class TestMessages:
             file_ids=["string"],
             metadata={},
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
@@ -49,7 +49,7 @@ class TestMessages:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
@@ -62,7 +62,7 @@ class TestMessages:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -81,7 +81,7 @@ class TestMessages:
             "string",
             thread_id="string",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
@@ -93,7 +93,7 @@ class TestMessages:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
@@ -105,7 +105,7 @@ class TestMessages:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -129,7 +129,7 @@ class TestMessages:
             "string",
             thread_id="string",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
@@ -138,7 +138,7 @@ class TestMessages:
             thread_id="string",
             metadata={},
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
@@ -150,7 +150,7 @@ class TestMessages:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
@@ -162,7 +162,7 @@ class TestMessages:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -185,7 +185,7 @@ class TestMessages:
         message = client.beta.threads.messages.list(
             "string",
         )
-        assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
@@ -196,7 +196,7 @@ class TestMessages:
             limit=0,
             order="asc",
         )
-        assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
@@ -207,7 +207,7 @@ class TestMessages:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
@@ -218,7 +218,7 @@ class TestMessages:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = response.parse()
-            assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+            assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -240,7 +240,7 @@ class TestAsyncMessages:
             content="x",
             role="user",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
@@ -251,7 +251,7 @@ class TestAsyncMessages:
             file_ids=["string"],
             metadata={},
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
@@ -264,7 +264,7 @@ class TestAsyncMessages:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
@@ -277,7 +277,7 @@ class TestAsyncMessages:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = await response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -296,7 +296,7 @@ class TestAsyncMessages:
             "string",
             thread_id="string",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
@@ -308,7 +308,7 @@ class TestAsyncMessages:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
@@ -320,7 +320,7 @@ class TestAsyncMessages:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = await response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -344,7 +344,7 @@ class TestAsyncMessages:
             "string",
             thread_id="string",
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
@@ -353,7 +353,7 @@ class TestAsyncMessages:
             thread_id="string",
             metadata={},
         )
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
@@ -365,7 +365,7 @@ class TestAsyncMessages:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(ThreadMessage, message, path=["response"])
+        assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
@@ -377,7 +377,7 @@ class TestAsyncMessages:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = await response.parse()
-            assert_matches_type(ThreadMessage, message, path=["response"])
+            assert_matches_type(Message, message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -400,7 +400,7 @@ class TestAsyncMessages:
         message = await async_client.beta.threads.messages.list(
             "string",
         )
-        assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
@@ -411,7 +411,7 @@ class TestAsyncMessages:
             limit=0,
             order="asc",
         )
-        assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
@@ -422,7 +422,7 @@ class TestAsyncMessages:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
-        assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+        assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
@@ -433,7 +433,7 @@ class TestAsyncMessages:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             message = await response.parse()
-            assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+            assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
tests/api_resources/beta/threads/test_runs.py
@@ -21,7 +21,7 @@ class TestRuns:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
-    def test_method_create(self, client: OpenAI) -> None:
+    def test_method_create_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.create(
             "string",
             assistant_id="string",
@@ -29,7 +29,7 @@ class TestRuns:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+    def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.create(
             "string",
             assistant_id="string",
@@ -37,12 +37,13 @@ class TestRuns:
             instructions="string",
             metadata={},
             model="string",
+            stream=False,
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_raw_response_create(self, client: OpenAI) -> None:
+    def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.create(
             "string",
             assistant_id="string",
@@ -54,7 +55,7 @@ class TestRuns:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_streaming_response_create(self, client: OpenAI) -> None:
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.create(
             "string",
             assistant_id="string",
@@ -68,13 +69,72 @@ class TestRuns:
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    def test_path_params_create(self, client: OpenAI) -> None:
+    def test_path_params_create_overload_1(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.create(
                 "",
                 assistant_id="string",
             )
 
+    @parametrize
+    def test_method_create_overload_2(self, client: OpenAI) -> None:
+        run_stream = client.beta.threads.runs.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        )
+        run_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
+        run_stream = client.beta.threads.runs.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+            additional_instructions="string",
+            instructions="string",
+            metadata={},
+            model="string",
+            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+        )
+        run_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
+        response = client.beta.threads.runs.with_raw_response.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.with_streaming_response.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create_overload_2(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            client.beta.threads.runs.with_raw_response.create(
+                "",
+                assistant_id="string",
+                stream=True,
+            )
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.retrieve(
@@ -278,7 +338,7 @@ class TestRuns:
             )
 
     @parametrize
-    def test_method_submit_tool_outputs(self, client: OpenAI) -> None:
+    def test_method_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -287,7 +347,30 @@ class TestRuns:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_raw_response_submit_tool_outputs(self, client: OpenAI) -> None:
+    def test_method_submit_tool_outputs_with_all_params_overload_1(self, client: OpenAI) -> None:
+        run = client.beta.threads.runs.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            tool_outputs=[
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+            ],
+            stream=False,
+        )
+        assert_matches_type(Run, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -300,7 +383,7 @@ class TestRuns:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    def test_streaming_response_submit_tool_outputs(self, client: OpenAI) -> None:
+    def test_streaming_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -315,11 +398,67 @@ class TestRuns:
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    def test_path_params_submit_tool_outputs(self, client: OpenAI) -> None:
+    def test_path_params_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                "string",
+                thread_id="",
+                tool_outputs=[{}, {}, {}],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                "",
+                thread_id="string",
+                tool_outputs=[{}, {}, {}],
+            )
+
+    @parametrize
+    def test_method_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
+        run_stream = client.beta.threads.runs.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        )
+        run_stream.response.close()
+
+    @parametrize
+    def test_raw_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
+        response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.submit_tool_outputs(
                 "string",
                 thread_id="",
+                stream=True,
                 tool_outputs=[{}, {}, {}],
             )
 
@@ -327,6 +466,7 @@ class TestRuns:
             client.beta.threads.runs.with_raw_response.submit_tool_outputs(
                 "",
                 thread_id="string",
+                stream=True,
                 tool_outputs=[{}, {}, {}],
             )
 
@@ -335,7 +475,7 @@ class TestAsyncRuns:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
-    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.create(
             "string",
             assistant_id="string",
@@ -343,7 +483,7 @@ class TestAsyncRuns:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.create(
             "string",
             assistant_id="string",
@@ -351,12 +491,13 @@ class TestAsyncRuns:
             instructions="string",
             metadata={},
             model="string",
+            stream=False,
             tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.create(
             "string",
             assistant_id="string",
@@ -368,7 +509,7 @@ class TestAsyncRuns:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.create(
             "string",
             assistant_id="string",
@@ -382,13 +523,72 @@ class TestAsyncRuns:
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+    async def test_path_params_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.create(
                 "",
                 assistant_id="string",
             )
 
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        run_stream = await async_client.beta.threads.runs.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        )
+        await run_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        run_stream = await async_client.beta.threads.runs.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+            additional_instructions="string",
+            instructions="string",
+            metadata={},
+            model="string",
+            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+        )
+        await run_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.threads.runs.with_raw_response.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.threads.runs.with_streaming_response.create(
+            "string",
+            assistant_id="string",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            await async_client.beta.threads.runs.with_raw_response.create(
+                "",
+                assistant_id="string",
+                stream=True,
+            )
+
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.retrieve(
@@ -592,7 +792,7 @@ class TestAsyncRuns:
             )
 
     @parametrize
-    async def test_method_submit_tool_outputs(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -601,7 +801,30 @@ class TestAsyncRuns:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_raw_response_submit_tool_outputs(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_submit_tool_outputs_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.beta.threads.runs.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            tool_outputs=[
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+                {
+                    "tool_call_id": "string",
+                    "output": "string",
+                },
+            ],
+            stream=False,
+        )
+        assert_matches_type(Run, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -614,7 +837,7 @@ class TestAsyncRuns:
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
-    async def test_streaming_response_submit_tool_outputs(self, async_client: AsyncOpenAI) -> None:
+    async def test_streaming_response_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
             "string",
             thread_id="string",
@@ -629,11 +852,67 @@ class TestAsyncRuns:
         assert cast(Any, response.is_closed) is True
 
     @parametrize
-    async def test_path_params_submit_tool_outputs(self, async_client: AsyncOpenAI) -> None:
+    async def test_path_params_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
+            await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                "string",
+                thread_id="",
+                tool_outputs=[{}, {}, {}],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+                "",
+                thread_id="string",
+                tool_outputs=[{}, {}, {}],
+            )
+
+    @parametrize
+    async def test_method_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
+        run_stream = await async_client.beta.threads.runs.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        )
+        await run_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            stream=True,
+            tool_outputs=[{}, {}, {}],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
                 "string",
                 thread_id="",
+                stream=True,
                 tool_outputs=[{}, {}, {}],
             )
 
@@ -641,5 +920,6 @@ class TestAsyncRuns:
             await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
                 "",
                 thread_id="string",
+                stream=True,
                 tool_outputs=[{}, {}, {}],
             )
tests/api_resources/beta/test_threads.py
@@ -196,19 +196,20 @@ class TestThreads:
             )
 
     @parametrize
-    def test_method_create_and_run(self, client: OpenAI) -> None:
+    def test_method_create_and_run_overload_1(self, client: OpenAI) -> None:
         thread = client.beta.threads.create_and_run(
             assistant_id="string",
         )
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    def test_method_create_and_run_with_all_params(self, client: OpenAI) -> None:
+    def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI) -> None:
         thread = client.beta.threads.create_and_run(
             assistant_id="string",
             instructions="string",
             metadata={},
             model="string",
+            stream=False,
             thread={
                 "messages": [
                     {
@@ -237,7 +238,7 @@ class TestThreads:
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    def test_raw_response_create_and_run(self, client: OpenAI) -> None:
+    def test_raw_response_create_and_run_overload_1(self, client: OpenAI) -> None:
         response = client.beta.threads.with_raw_response.create_and_run(
             assistant_id="string",
         )
@@ -248,7 +249,7 @@ class TestThreads:
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    def test_streaming_response_create_and_run(self, client: OpenAI) -> None:
+    def test_streaming_response_create_and_run_overload_1(self, client: OpenAI) -> None:
         with client.beta.threads.with_streaming_response.create_and_run(
             assistant_id="string",
         ) as response:
@@ -260,6 +261,74 @@ class TestThreads:
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    def test_method_create_and_run_overload_2(self, client: OpenAI) -> None:
+        thread_stream = client.beta.threads.create_and_run(
+            assistant_id="string",
+            stream=True,
+        )
+        thread_stream.response.close()
+
+    @parametrize
+    def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI) -> None:
+        thread_stream = client.beta.threads.create_and_run(
+            assistant_id="string",
+            stream=True,
+            instructions="string",
+            metadata={},
+            model="string",
+            thread={
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": "x",
+                        "file_ids": ["string"],
+                        "metadata": {},
+                    },
+                    {
+                        "role": "user",
+                        "content": "x",
+                        "file_ids": ["string"],
+                        "metadata": {},
+                    },
+                    {
+                        "role": "user",
+                        "content": "x",
+                        "file_ids": ["string"],
+                        "metadata": {},
+                    },
+                ],
+                "metadata": {},
+            },
+            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+        )
+        thread_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_and_run_overload_2(self, client: OpenAI) -> None:
+        response = client.beta.threads.with_raw_response.create_and_run(
+            assistant_id="string",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_and_run_overload_2(self, client: OpenAI) -> None:
+        with client.beta.threads.with_streaming_response.create_and_run(
+            assistant_id="string",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncThreads:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
@@ -439,19 +508,20 @@ class TestAsyncThreads:
             )
 
     @parametrize
-    async def test_method_create_and_run(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
         thread = await async_client.beta.threads.create_and_run(
             assistant_id="string",
         )
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    async def test_method_create_and_run_with_all_params(self, async_client: AsyncOpenAI) -> None:
+    async def test_method_create_and_run_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         thread = await async_client.beta.threads.create_and_run(
             assistant_id="string",
             instructions="string",
             metadata={},
             model="string",
+            stream=False,
             thread={
                 "messages": [
                     {
@@ -480,7 +550,7 @@ class TestAsyncThreads:
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    async def test_raw_response_create_and_run(self, async_client: AsyncOpenAI) -> None:
+    async def test_raw_response_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.with_raw_response.create_and_run(
             assistant_id="string",
         )
@@ -491,7 +561,7 @@ class TestAsyncThreads:
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
-    async def test_streaming_response_create_and_run(self, async_client: AsyncOpenAI) -> None:
+    async def test_streaming_response_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.with_streaming_response.create_and_run(
             assistant_id="string",
         ) as response:
@@ -502,3 +572,71 @@ class TestAsyncThreads:
             assert_matches_type(Run, thread, path=["response"])
 
         assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
+        thread_stream = await async_client.beta.threads.create_and_run(
+            assistant_id="string",
+            stream=True,
+        )
+        await thread_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_and_run_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        thread_stream = await async_client.beta.threads.create_and_run(
+            assistant_id="string",
+            stream=True,
+            instructions="string",
+            metadata={},
+            model="string",
+            thread={
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": "x",
+                        "file_ids": ["string"],
+                        "metadata": {},
+                    },
+                    {
+                        "role": "user",
+                        "content": "x",
+                        "file_ids": ["string"],
+                        "metadata": {},
+                    },
+                    {
+                        "role": "user",
+                        "content": "x",
+                        "file_ids": ["string"],
+                        "metadata": {},
+                    },
+                ],
+                "metadata": {},
+            },
+            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+        )
+        await thread_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.threads.with_raw_response.create_and_run(
+            assistant_id="string",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.threads.with_streaming_response.create_and_run(
+            assistant_id="string",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.13.4"
+  ".": "1.14.0"
 }
\ No newline at end of file
api.md
@@ -1,7 +1,7 @@
 # Shared Types
 
 ```python
-from openai.types import FunctionDefinition, FunctionParameters
+from openai.types import ErrorObject, FunctionDefinition, FunctionParameters
 ```
 
 # Completions
@@ -177,7 +177,19 @@ Methods:
 Types:
 
 ```python
-from openai.types.beta import Assistant, AssistantDeleted
+from openai.types.beta import (
+    Assistant,
+    AssistantDeleted,
+    AssistantStreamEvent,
+    AssistantTool,
+    CodeInterpreterTool,
+    FunctionTool,
+    MessageStreamEvent,
+    RetrievalTool,
+    RunStepStreamEvent,
+    RunStreamEvent,
+    ThreadStreamEvent,
+)
 ```
 
 Methods:
@@ -218,6 +230,7 @@ Methods:
 - <code title="post /threads/{thread_id}">client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">update</a>(thread_id, \*\*<a href="src/openai/types/beta/thread_update_params.py">params</a>) -> <a href="./src/openai/types/beta/thread.py">Thread</a></code>
 - <code title="delete /threads/{thread_id}">client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">delete</a>(thread_id) -> <a href="./src/openai/types/beta/thread_deleted.py">ThreadDeleted</a></code>
 - <code title="post /threads/runs">client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">create_and_run</a>(\*\*<a href="src/openai/types/beta/thread_create_and_run_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/run.py">Run</a></code>
+- <code>client.beta.threads.<a href="./src/openai/resources/beta/threads/threads.py">create_and_run_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
 
 ### Runs
 
@@ -235,6 +248,8 @@ Methods:
 - <code title="get /threads/{thread_id}/runs">client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">list</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/run_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/run.py">SyncCursorPage[Run]</a></code>
 - <code title="post /threads/{thread_id}/runs/{run_id}/cancel">client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">cancel</a>(run_id, \*, thread_id) -> <a href="./src/openai/types/beta/threads/run.py">Run</a></code>
 - <code title="post /threads/{thread_id}/runs/{run_id}/submit_tool_outputs">client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs</a>(run_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/run_submit_tool_outputs_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/run.py">Run</a></code>
+- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">create_and_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
+- <code>client.beta.threads.runs.<a href="./src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
 
 #### Steps
 
@@ -242,11 +257,22 @@ Types:
 
 ```python
 from openai.types.beta.threads.runs import (
-    CodeToolCall,
+    CodeInterpreterLogs,
+    CodeInterpreterOutputImage,
+    CodeInterpreterToolCall,
+    CodeInterpreterToolCallDelta,
     FunctionToolCall,
+    FunctionToolCallDelta,
     MessageCreationStepDetails,
     RetrievalToolCall,
+    RetrievalToolCallDelta,
     RunStep,
+    RunStepDelta,
+    RunStepDeltaEvent,
+    RunStepDeltaMessageDelta,
+    ToolCall,
+    ToolCallDelta,
+    ToolCallDeltaObject,
     ToolCallsStepDetails,
 )
 ```
@@ -262,19 +288,35 @@ Types:
 
 ```python
 from openai.types.beta.threads import (
-    MessageContentImageFile,
-    MessageContentText,
-    ThreadMessage,
-    ThreadMessageDeleted,
+    Annotation,
+    AnnotationDelta,
+    FileCitationAnnotation,
+    FileCitationDeltaAnnotation,
+    FilePathAnnotation,
+    FilePathDeltaAnnotation,
+    ImageFile,
+    ImageFileContentBlock,
+    ImageFileDelta,
+    ImageFileDeltaBlock,
+    Message,
+    MessageContent,
+    MessageContentDelta,
+    MessageDeleted,
+    MessageDelta,
+    MessageDeltaEvent,
+    Text,
+    TextContentBlock,
+    TextDelta,
+    TextDeltaBlock,
 )
 ```
 
 Methods:
 
-- <code title="post /threads/{thread_id}/messages">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">create</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/message_create_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/thread_message.py">ThreadMessage</a></code>
-- <code title="get /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">retrieve</a>(message_id, \*, thread_id) -> <a href="./src/openai/types/beta/threads/thread_message.py">ThreadMessage</a></code>
-- <code title="post /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">update</a>(message_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/message_update_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/thread_message.py">ThreadMessage</a></code>
-- <code title="get /threads/{thread_id}/messages">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">list</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/message_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/thread_message.py">SyncCursorPage[ThreadMessage]</a></code>
+- <code title="post /threads/{thread_id}/messages">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">create</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/message_create_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/message.py">Message</a></code>
+- <code title="get /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">retrieve</a>(message_id, \*, thread_id) -> <a href="./src/openai/types/beta/threads/message.py">Message</a></code>
+- <code title="post /threads/{thread_id}/messages/{message_id}">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">update</a>(message_id, \*, thread_id, \*\*<a href="src/openai/types/beta/threads/message_update_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/message.py">Message</a></code>
+- <code title="get /threads/{thread_id}/messages">client.beta.threads.messages.<a href="./src/openai/resources/beta/threads/messages/messages.py">list</a>(thread_id, \*\*<a href="src/openai/types/beta/threads/message_list_params.py">params</a>) -> <a href="./src/openai/types/beta/threads/message.py">SyncCursorPage[Message]</a></code>
 
 #### Files
 
CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 1.14.0 (2024-03-13)
+
+Full Changelog: [v1.13.4...v1.14.0](https://github.com/openai/openai-python/compare/v1.13.4...v1.14.0)
+
+### Features
+
+* **assistants:** add support for streaming ([#1233](https://github.com/openai/openai-python/issues/1233)) ([17635dc](https://github.com/openai/openai-python/commit/17635dccbeddf153f8201dbca18b44e16a1799b2))
+
 ## 1.13.4 (2024-03-13)
 
 Full Changelog: [v1.13.3...v1.13.4](https://github.com/openai/openai-python/compare/v1.13.3...v1.13.4)
pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.13.4"
+version = "1.14.0"
 description = "The official Python library for the openai API"
 readme = "README.md"
 license = "Apache-2.0"
@@ -60,6 +60,7 @@ dev-dependencies = [
     "nox",
     "dirty-equals>=0.6.0",
     "importlib-metadata>=6.7.0",
+    "inline-snapshot >=0.7.0",
     "azure-identity >=1.14.1",
     "types-tqdm > 4",
     "types-pyaudio > 0"
requirements-dev.lock
@@ -15,11 +15,15 @@ anyio==4.1.0
     # via openai
 argcomplete==3.1.2
     # via nox
+asttokens==2.4.1
+    # via inline-snapshot
 attrs==23.1.0
     # via pytest
 azure-core==1.30.1
     # via azure-identity
 azure-identity==1.15.0
+black==24.2.0
+    # via inline-snapshot
 certifi==2023.7.22
     # via httpcore
     # via httpx
@@ -28,6 +32,9 @@ cffi==1.16.0
     # via cryptography
 charset-normalizer==3.3.2
     # via requests
+click==8.1.7
+    # via black
+    # via inline-snapshot
 colorlog==6.7.0
     # via nox
 cryptography==42.0.5
@@ -41,6 +48,8 @@ distro==1.8.0
     # via openai
 exceptiongroup==1.1.3
     # via anyio
+executing==2.0.1
+    # via inline-snapshot
 filelock==3.12.4
     # via virtualenv
 h11==0.14.0
@@ -57,6 +66,7 @@ idna==3.4
 importlib-metadata==7.0.0
 iniconfig==2.0.0
     # via pytest
+inline-snapshot==0.7.0
 msal==1.27.0
     # via azure-identity
     # via msal-extensions
@@ -64,6 +74,7 @@ msal-extensions==1.1.0
     # via azure-identity
 mypy==1.7.1
 mypy-extensions==1.0.0
+    # via black
     # via mypy
 nodeenv==1.8.0
     # via pyright
@@ -73,6 +84,7 @@ numpy==1.26.3
     # via pandas
     # via pandas-stubs
 packaging==23.2
+    # via black
     # via msal-extensions
     # via nox
     # via pytest
@@ -80,7 +92,10 @@ pandas==2.1.4
     # via openai
 pandas-stubs==2.1.4.231227
     # via openai
+pathspec==0.12.1
+    # via black
 platformdirs==3.11.0
+    # via black
     # via virtualenv
 pluggy==1.3.0
     # via pytest
@@ -114,6 +129,7 @@ ruff==0.1.9
 setuptools==68.2.2
     # via nodeenv
 six==1.16.0
+    # via asttokens
     # via azure-core
     # via python-dateutil
 sniffio==1.3.0
@@ -121,7 +137,10 @@ sniffio==1.3.0
     # via httpx
     # via openai
 time-machine==2.9.0
+toml==0.10.2
+    # via inline-snapshot
 tomli==2.0.1
+    # via black
     # via mypy
     # via pytest
 tqdm==4.66.1
@@ -129,9 +148,12 @@ tqdm==4.66.1
 types-pyaudio==0.2.16.20240106
 types-pytz==2024.1.0.20240203
     # via pandas-stubs
+types-toml==0.10.8.20240310
+    # via inline-snapshot
 types-tqdm==4.66.0.2
 typing-extensions==4.8.0
     # via azure-core
+    # via black
     # via mypy
     # via openai
     # via pydantic