Commit `86379b44`

Stainless Bot <107565488+stainless-bot@users.noreply.github.com>

2024-01-15 22:11:27

feat(client): add support for streaming raw responses (#1072)

As an alternative to `with_raw_response` we now provide `with_streaming_response` as well. When using these methods you will have to use a context manager to ensure that the response is always cleaned up.

main

1 parent ac33853

Changed files (61)

examples

audio.py

src

openai

resources

audio

beta

assistants

__init__.py

assistants.py

files.py

threads

messages

runs

chat

fine_tuning

tests

api_resources

audio

test_speech.py

test_transcriptions.py

test_translations.py

beta

assistants

test_files.py

threads

messages

runs

chat

fine_tuning

@@ -12,14 +12,18 @@ speech_file_path = Path(__file__).parent / "speech.mp3"
 
 def main() -> None:
     # Create text-to-speech audio file
-    response = openai.audio.speech.create(
-        model="tts-1", voice="alloy", input="the quick brown fox jumped over the lazy dogs"
-    )
-
-    response.stream_to_file(speech_file_path)
+    with openai.audio.speech.with_streaming_response.create(
+        model="tts-1",
+        voice="alloy",
+        input="the quick brown fox jumped over the lazy dogs",
+    ) as response:
+        response.stream_to_file(speech_file_path)
 
     # Create transcription from audio file
-    transcription = openai.audio.transcriptions.create(model="whisper-1", file=speech_file_path)
+    transcription = openai.audio.transcriptions.create(
+        model="whisper-1",
+        file=speech_file_path,
+    )
     print(transcription.text)
 
     # Create translation from audio file

@@ -1,13 +1,36 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from .audio import Audio, AsyncAudio, AudioWithRawResponse, AsyncAudioWithRawResponse
-from .speech import Speech, AsyncSpeech, SpeechWithRawResponse, AsyncSpeechWithRawResponse
-from .translations import Translations, AsyncTranslations, TranslationsWithRawResponse, AsyncTranslationsWithRawResponse
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .speech import (
+    Speech,
+    AsyncSpeech,
+    SpeechWithRawResponse,
+    AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
+)
+from .translations import (
+    Translations,
+    AsyncTranslations,
+    TranslationsWithRawResponse,
+    AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
+)
 from .transcriptions import (
     Transcriptions,
     AsyncTranscriptions,
     TranscriptionsWithRawResponse,
     AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
 )
 
 __all__ = [
@@ -15,16 +38,24 @@ __all__ = [
     "AsyncTranscriptions",
     "TranscriptionsWithRawResponse",
     "AsyncTranscriptionsWithRawResponse",
+    "TranscriptionsWithStreamingResponse",
+    "AsyncTranscriptionsWithStreamingResponse",
     "Translations",
     "AsyncTranslations",
     "TranslationsWithRawResponse",
     "AsyncTranslationsWithRawResponse",
+    "TranslationsWithStreamingResponse",
+    "AsyncTranslationsWithStreamingResponse",
     "Speech",
     "AsyncSpeech",
     "SpeechWithRawResponse",
     "AsyncSpeechWithRawResponse",
+    "SpeechWithStreamingResponse",
+    "AsyncSpeechWithStreamingResponse",
     "Audio",
     "AsyncAudio",
     "AudioWithRawResponse",
     "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
 ]

@@ -2,15 +2,31 @@
 
 from __future__ import annotations
 
-from .speech import Speech, AsyncSpeech, SpeechWithRawResponse, AsyncSpeechWithRawResponse
+from .speech import (
+    Speech,
+    AsyncSpeech,
+    SpeechWithRawResponse,
+    AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
+)
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from .translations import Translations, AsyncTranslations, TranslationsWithRawResponse, AsyncTranslationsWithRawResponse
+from .translations import (
+    Translations,
+    AsyncTranslations,
+    TranslationsWithRawResponse,
+    AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
+)
 from .transcriptions import (
     Transcriptions,
     AsyncTranscriptions,
     TranscriptionsWithRawResponse,
     AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
 )
 
 __all__ = ["Audio", "AsyncAudio"]
@@ -33,6 +49,10 @@ class Audio(SyncAPIResource):
     def with_raw_response(self) -> AudioWithRawResponse:
         return AudioWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AudioWithStreamingResponse:
+        return AudioWithStreamingResponse(self)
+
 
 class AsyncAudio(AsyncAPIResource):
     @cached_property
@@ -51,6 +71,10 @@ class AsyncAudio(AsyncAPIResource):
     def with_raw_response(self) -> AsyncAudioWithRawResponse:
         return AsyncAudioWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        return AsyncAudioWithStreamingResponse(self)
+
 
 class AudioWithRawResponse:
     def __init__(self, audio: Audio) -> None:
@@ -64,3 +88,17 @@ class AsyncAudioWithRawResponse:
         self.transcriptions = AsyncTranscriptionsWithRawResponse(audio.transcriptions)
         self.translations = AsyncTranslationsWithRawResponse(audio.translations)
         self.speech = AsyncSpeechWithRawResponse(audio.speech)
+
+
+class AudioWithStreamingResponse:
+    def __init__(self, audio: Audio) -> None:
+        self.transcriptions = TranscriptionsWithStreamingResponse(audio.transcriptions)
+        self.translations = TranslationsWithStreamingResponse(audio.translations)
+        self.speech = SpeechWithStreamingResponse(audio.speech)
+
+
+class AsyncAudioWithStreamingResponse:
+    def __init__(self, audio: AsyncAudio) -> None:
+        self.transcriptions = AsyncTranscriptionsWithStreamingResponse(audio.transcriptions)
+        self.translations = AsyncTranslationsWithStreamingResponse(audio.translations)
+        self.speech = AsyncSpeechWithStreamingResponse(audio.speech)

@@ -7,14 +7,19 @@ from typing_extensions import Literal
 
 import httpx
 
+from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ..._utils import maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ..._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
 from ...types.audio import speech_create_params
 from ..._base_client import (
-    HttpxBinaryResponseContent,
     make_request_options,
 )
 
@@ -26,6 +31,10 @@ class Speech(SyncAPIResource):
     def with_raw_response(self) -> SpeechWithRawResponse:
         return SpeechWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> SpeechWithStreamingResponse:
+        return SpeechWithStreamingResponse(self)
+
     def create(
         self,
         *,
@@ -40,7 +49,7 @@ class Speech(SyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> HttpxBinaryResponseContent:
+    ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Generates audio from the input text.
 
@@ -84,7 +93,7 @@ class Speech(SyncAPIResource):
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=HttpxBinaryResponseContent,
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
         )
 
 
@@ -93,6 +102,10 @@ class AsyncSpeech(AsyncAPIResource):
     def with_raw_response(self) -> AsyncSpeechWithRawResponse:
         return AsyncSpeechWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
+        return AsyncSpeechWithStreamingResponse(self)
+
     async def create(
         self,
         *,
@@ -107,7 +120,7 @@ class AsyncSpeech(AsyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> HttpxBinaryResponseContent:
+    ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Generates audio from the input text.
 
@@ -151,19 +164,35 @@ class AsyncSpeech(AsyncAPIResource):
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=HttpxBinaryResponseContent,
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
         )
 
 
 class SpeechWithRawResponse:
     def __init__(self, speech: Speech) -> None:
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             speech.create,
         )
 
 
 class AsyncSpeechWithRawResponse:
     def __init__(self, speech: AsyncSpeech) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            speech.create,
+        )
+
+
+class SpeechWithStreamingResponse:
+    def __init__(self, speech: Speech) -> None:
+        self.create = to_custom_streamed_response_wrapper(
+            speech.create,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncSpeechWithStreamingResponse:
+    def __init__(self, speech: AsyncSpeech) -> None:
+        self.create = async_to_custom_streamed_response_wrapper(
             speech.create,
+            AsyncStreamedBinaryAPIResponse,
         )

@@ -7,11 +7,12 @@ from typing_extensions import Literal
 
 import httpx
 
+from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
 from ..._utils import extract_files, maybe_transform, deepcopy_minimal
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ...types.audio import Transcription, transcription_create_params
 from ..._base_client import (
     make_request_options,
@@ -25,6 +26,10 @@ class Transcriptions(SyncAPIResource):
     def with_raw_response(self) -> TranscriptionsWithRawResponse:
         return TranscriptionsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        return TranscriptionsWithStreamingResponse(self)
+
     def create(
         self,
         *,
@@ -110,6 +115,10 @@ class AsyncTranscriptions(AsyncAPIResource):
     def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
         return AsyncTranscriptionsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        return AsyncTranscriptionsWithStreamingResponse(self)
+
     async def create(
         self,
         *,
@@ -192,13 +201,27 @@ class AsyncTranscriptions(AsyncAPIResource):
 
 class TranscriptionsWithRawResponse:
     def __init__(self, transcriptions: Transcriptions) -> None:
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             transcriptions.create,
         )
 
 
 class AsyncTranscriptionsWithRawResponse:
     def __init__(self, transcriptions: AsyncTranscriptions) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class TranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: Transcriptions) -> None:
+        self.create = to_streamed_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class AsyncTranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
+        self.create = async_to_streamed_response_wrapper(
             transcriptions.create,
         )

@@ -7,11 +7,12 @@ from typing_extensions import Literal
 
 import httpx
 
+from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
 from ..._utils import extract_files, maybe_transform, deepcopy_minimal
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ...types.audio import Translation, translation_create_params
 from ..._base_client import (
     make_request_options,
@@ -25,6 +26,10 @@ class Translations(SyncAPIResource):
     def with_raw_response(self) -> TranslationsWithRawResponse:
         return TranslationsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        return TranslationsWithStreamingResponse(self)
+
     def create(
         self,
         *,
@@ -103,6 +108,10 @@ class AsyncTranslations(AsyncAPIResource):
     def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
         return AsyncTranslationsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        return AsyncTranslationsWithStreamingResponse(self)
+
     async def create(
         self,
         *,
@@ -178,13 +187,27 @@ class AsyncTranslations(AsyncAPIResource):
 
 class TranslationsWithRawResponse:
     def __init__(self, translations: Translations) -> None:
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             translations.create,
         )
 
 
 class AsyncTranslationsWithRawResponse:
     def __init__(self, translations: AsyncTranslations) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            translations.create,
+        )
+
+
+class TranslationsWithStreamingResponse:
+    def __init__(self, translations: Translations) -> None:
+        self.create = to_streamed_response_wrapper(
+            translations.create,
+        )
+
+
+class AsyncTranslationsWithStreamingResponse:
+    def __init__(self, translations: AsyncTranslations) -> None:
+        self.create = async_to_streamed_response_wrapper(
             translations.create,
         )

@@ -1,15 +1,33 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from .files import Files, AsyncFiles, FilesWithRawResponse, AsyncFilesWithRawResponse
-from .assistants import Assistants, AsyncAssistants, AssistantsWithRawResponse, AsyncAssistantsWithRawResponse
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .assistants import (
+    Assistants,
+    AsyncAssistants,
+    AssistantsWithRawResponse,
+    AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
+)
 
 __all__ = [
     "Files",
     "AsyncFiles",
     "FilesWithRawResponse",
     "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
     "Assistants",
     "AsyncAssistants",
     "AssistantsWithRawResponse",
     "AsyncAssistantsWithRawResponse",
+    "AssistantsWithStreamingResponse",
+    "AsyncAssistantsWithStreamingResponse",
 ]

@@ -7,12 +7,20 @@ from typing_extensions import Literal
 
 import httpx
 
-from .files import Files, AsyncFiles, FilesWithRawResponse, AsyncFilesWithRawResponse
+from .... import _legacy_response
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ...._utils import maybe_transform
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
 from ....types.beta import (
     Assistant,
@@ -38,6 +46,10 @@ class Assistants(SyncAPIResource):
     def with_raw_response(self) -> AssistantsWithRawResponse:
         return AssistantsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AssistantsWithStreamingResponse:
+        return AssistantsWithStreamingResponse(self)
+
     def create(
         self,
         *,
@@ -331,6 +343,10 @@ class AsyncAssistants(AsyncAPIResource):
     def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
         return AsyncAssistantsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
+        return AsyncAssistantsWithStreamingResponse(self)
+
     async def create(
         self,
         *,
@@ -619,19 +635,19 @@ class AssistantsWithRawResponse:
     def __init__(self, assistants: Assistants) -> None:
         self.files = FilesWithRawResponse(assistants.files)
 
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             assistants.create,
         )
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             assistants.retrieve,
         )
-        self.update = to_raw_response_wrapper(
+        self.update = _legacy_response.to_raw_response_wrapper(
             assistants.update,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             assistants.list,
         )
-        self.delete = to_raw_response_wrapper(
+        self.delete = _legacy_response.to_raw_response_wrapper(
             assistants.delete,
         )
 
@@ -640,18 +656,60 @@ class AsyncAssistantsWithRawResponse:
     def __init__(self, assistants: AsyncAssistants) -> None:
         self.files = AsyncFilesWithRawResponse(assistants.files)
 
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            assistants.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            assistants.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AssistantsWithStreamingResponse:
+    def __init__(self, assistants: Assistants) -> None:
+        self.files = FilesWithStreamingResponse(assistants.files)
+
+        self.create = to_streamed_response_wrapper(
+            assistants.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            assistants.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            assistants.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            assistants.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            assistants.delete,
+        )
+
+
+class AsyncAssistantsWithStreamingResponse:
+    def __init__(self, assistants: AsyncAssistants) -> None:
+        self.files = AsyncFilesWithStreamingResponse(assistants.files)
+
+        self.create = async_to_streamed_response_wrapper(
             assistants.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = async_to_streamed_response_wrapper(
             assistants.retrieve,
         )
-        self.update = async_to_raw_response_wrapper(
+        self.update = async_to_streamed_response_wrapper(
             assistants.update,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             assistants.list,
         )
-        self.delete = async_to_raw_response_wrapper(
+        self.delete = async_to_streamed_response_wrapper(
             assistants.delete,
         )

@@ -6,11 +6,12 @@ from typing_extensions import Literal
 
 import httpx
 
+from .... import _legacy_response
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ...._utils import maybe_transform
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
 from ...._base_client import (
     AsyncPaginator,
@@ -26,6 +27,10 @@ class Files(SyncAPIResource):
     def with_raw_response(self) -> FilesWithRawResponse:
         return FilesWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self)
+
     def create(
         self,
         assistant_id: str,
@@ -203,6 +208,10 @@ class AsyncFiles(AsyncAPIResource):
     def with_raw_response(self) -> AsyncFilesWithRawResponse:
         return AsyncFilesWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self)
+
     async def create(
         self,
         assistant_id: str,
@@ -377,31 +386,63 @@ class AsyncFiles(AsyncAPIResource):
 
 class FilesWithRawResponse:
     def __init__(self, files: Files) -> None:
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             files.create,
         )
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             files.retrieve,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             files.list,
         )
-        self.delete = to_raw_response_wrapper(
+        self.delete = _legacy_response.to_raw_response_wrapper(
             files.delete,
         )
 
 
 class AsyncFilesWithRawResponse:
     def __init__(self, files: AsyncFiles) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self.create = async_to_streamed_response_wrapper(
             files.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = async_to_streamed_response_wrapper(
             files.retrieve,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             files.list,
         )
-        self.delete = async_to_raw_response_wrapper(
+        self.delete = async_to_streamed_response_wrapper(
             files.delete,
         )

@@ -1,15 +1,33 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from .files import Files, AsyncFiles, FilesWithRawResponse, AsyncFilesWithRawResponse
-from .messages import Messages, AsyncMessages, MessagesWithRawResponse, AsyncMessagesWithRawResponse
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
 
 __all__ = [
     "Files",
     "AsyncFiles",
     "FilesWithRawResponse",
     "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
     "Messages",
     "AsyncMessages",
     "MessagesWithRawResponse",
     "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
 ]

@@ -6,11 +6,12 @@ from typing_extensions import Literal
 
 import httpx
 
+from ..... import _legacy_response
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ....._utils import maybe_transform
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .....pagination import SyncCursorPage, AsyncCursorPage
 from ....._base_client import (
     AsyncPaginator,
@@ -26,6 +27,10 @@ class Files(SyncAPIResource):
     def with_raw_response(self) -> FilesWithRawResponse:
         return FilesWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self)
+
     def retrieve(
         self,
         file_id: str,
@@ -133,6 +138,10 @@ class AsyncFiles(AsyncAPIResource):
     def with_raw_response(self) -> AsyncFilesWithRawResponse:
         return AsyncFilesWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self)
+
     async def retrieve(
         self,
         file_id: str,
@@ -237,19 +246,39 @@ class AsyncFiles(AsyncAPIResource):
 
 class FilesWithRawResponse:
     def __init__(self, files: Files) -> None:
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             files.retrieve,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             files.list,
         )
 
 
 class AsyncFilesWithRawResponse:
     def __init__(self, files: AsyncFiles) -> None:
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self.retrieve = async_to_streamed_response_wrapper(
             files.retrieve,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             files.list,
         )

@@ -7,12 +7,20 @@ from typing_extensions import Literal
 
 import httpx
 
-from .files import Files, AsyncFiles, FilesWithRawResponse, AsyncFilesWithRawResponse
+from ..... import _legacy_response
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ....._utils import maybe_transform
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .....pagination import SyncCursorPage, AsyncCursorPage
 from ....._base_client import (
     AsyncPaginator,
@@ -32,6 +40,10 @@ class Messages(SyncAPIResource):
     def with_raw_response(self) -> MessagesWithRawResponse:
         return MessagesWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self)
+
     def create(
         self,
         thread_id: str,
@@ -240,6 +252,10 @@ class AsyncMessages(AsyncAPIResource):
     def with_raw_response(self) -> AsyncMessagesWithRawResponse:
         return AsyncMessagesWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self)
+
     async def create(
         self,
         thread_id: str,
@@ -443,16 +459,16 @@ class MessagesWithRawResponse:
     def __init__(self, messages: Messages) -> None:
         self.files = FilesWithRawResponse(messages.files)
 
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             messages.create,
         )
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             messages.retrieve,
         )
-        self.update = to_raw_response_wrapper(
+        self.update = _legacy_response.to_raw_response_wrapper(
             messages.update,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             messages.list,
         )
 
@@ -461,15 +477,51 @@ class AsyncMessagesWithRawResponse:
     def __init__(self, messages: AsyncMessages) -> None:
         self.files = AsyncFilesWithRawResponse(messages.files)
 
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            messages.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            messages.list,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self.files = FilesWithStreamingResponse(messages.files)
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            messages.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            messages.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            messages.list,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self.files = AsyncFilesWithStreamingResponse(messages.files)
+
+        self.create = async_to_streamed_response_wrapper(
             messages.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = async_to_streamed_response_wrapper(
             messages.retrieve,
         )
-        self.update = async_to_raw_response_wrapper(
+        self.update = async_to_streamed_response_wrapper(
             messages.update,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             messages.list,
         )

@@ -1,15 +1,33 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from .runs import Runs, AsyncRuns, RunsWithRawResponse, AsyncRunsWithRawResponse
-from .steps import Steps, AsyncSteps, StepsWithRawResponse, AsyncStepsWithRawResponse
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .steps import (
+    Steps,
+    AsyncSteps,
+    StepsWithRawResponse,
+    AsyncStepsWithRawResponse,
+    StepsWithStreamingResponse,
+    AsyncStepsWithStreamingResponse,
+)
 
 __all__ = [
     "Steps",
     "AsyncSteps",
     "StepsWithRawResponse",
     "AsyncStepsWithRawResponse",
+    "StepsWithStreamingResponse",
+    "AsyncStepsWithStreamingResponse",
     "Runs",
     "AsyncRuns",
     "RunsWithRawResponse",
     "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
 ]

@@ -7,12 +7,20 @@ from typing_extensions import Literal
 
 import httpx
 
-from .steps import Steps, AsyncSteps, StepsWithRawResponse, AsyncStepsWithRawResponse
+from ..... import _legacy_response
+from .steps import (
+    Steps,
+    AsyncSteps,
+    StepsWithRawResponse,
+    AsyncStepsWithRawResponse,
+    StepsWithStreamingResponse,
+    AsyncStepsWithStreamingResponse,
+)
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ....._utils import maybe_transform
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .....pagination import SyncCursorPage, AsyncCursorPage
 from ....._base_client import (
     AsyncPaginator,
@@ -38,6 +46,10 @@ class Runs(SyncAPIResource):
     def with_raw_response(self) -> RunsWithRawResponse:
         return RunsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> RunsWithStreamingResponse:
+        return RunsWithStreamingResponse(self)
+
     def create(
         self,
         thread_id: str,
@@ -335,6 +347,10 @@ class AsyncRuns(AsyncAPIResource):
     def with_raw_response(self) -> AsyncRunsWithRawResponse:
         return AsyncRunsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self)
+
     async def create(
         self,
         thread_id: str,
@@ -627,22 +643,22 @@ class RunsWithRawResponse:
     def __init__(self, runs: Runs) -> None:
         self.steps = StepsWithRawResponse(runs.steps)
 
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             runs.create,
         )
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             runs.retrieve,
         )
-        self.update = to_raw_response_wrapper(
+        self.update = _legacy_response.to_raw_response_wrapper(
             runs.update,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             runs.list,
         )
-        self.cancel = to_raw_response_wrapper(
+        self.cancel = _legacy_response.to_raw_response_wrapper(
             runs.cancel,
         )
-        self.submit_tool_outputs = to_raw_response_wrapper(
+        self.submit_tool_outputs = _legacy_response.to_raw_response_wrapper(
             runs.submit_tool_outputs,
         )
 
@@ -651,21 +667,69 @@ class AsyncRunsWithRawResponse:
     def __init__(self, runs: AsyncRuns) -> None:
         self.steps = AsyncStepsWithRawResponse(runs.steps)
 
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            runs.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            runs.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = _legacy_response.async_to_raw_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+
+class RunsWithStreamingResponse:
+    def __init__(self, runs: Runs) -> None:
+        self.steps = StepsWithStreamingResponse(runs.steps)
+
+        self.create = to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            runs.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            runs.cancel,
+        )
+        self.submit_tool_outputs = to_streamed_response_wrapper(
+            runs.submit_tool_outputs,
+        )
+
+
+class AsyncRunsWithStreamingResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self.steps = AsyncStepsWithStreamingResponse(runs.steps)
+
+        self.create = async_to_streamed_response_wrapper(
             runs.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = async_to_streamed_response_wrapper(
             runs.retrieve,
         )
-        self.update = async_to_raw_response_wrapper(
+        self.update = async_to_streamed_response_wrapper(
             runs.update,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             runs.list,
         )
-        self.cancel = async_to_raw_response_wrapper(
+        self.cancel = async_to_streamed_response_wrapper(
             runs.cancel,
         )
-        self.submit_tool_outputs = async_to_raw_response_wrapper(
+        self.submit_tool_outputs = async_to_streamed_response_wrapper(
             runs.submit_tool_outputs,
         )

@@ -6,11 +6,12 @@ from typing_extensions import Literal
 
 import httpx
 
+from ..... import _legacy_response
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ....._utils import maybe_transform
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .....pagination import SyncCursorPage, AsyncCursorPage
 from ....._base_client import (
     AsyncPaginator,
@@ -26,6 +27,10 @@ class Steps(SyncAPIResource):
     def with_raw_response(self) -> StepsWithRawResponse:
         return StepsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> StepsWithStreamingResponse:
+        return StepsWithStreamingResponse(self)
+
     def retrieve(
         self,
         step_id: str,
@@ -132,6 +137,10 @@ class AsyncSteps(AsyncAPIResource):
     def with_raw_response(self) -> AsyncStepsWithRawResponse:
         return AsyncStepsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
+        return AsyncStepsWithStreamingResponse(self)
+
     async def retrieve(
         self,
         step_id: str,
@@ -235,19 +244,39 @@ class AsyncSteps(AsyncAPIResource):
 
 class StepsWithRawResponse:
     def __init__(self, steps: Steps) -> None:
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             steps.retrieve,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             steps.list,
         )
 
 
 class AsyncStepsWithRawResponse:
     def __init__(self, steps: AsyncSteps) -> None:
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            steps.list,
+        )
+
+
+class StepsWithStreamingResponse:
+    def __init__(self, steps: Steps) -> None:
+        self.retrieve = to_streamed_response_wrapper(
+            steps.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            steps.list,
+        )
+
+
+class AsyncStepsWithStreamingResponse:
+    def __init__(self, steps: AsyncSteps) -> None:
+        self.retrieve = async_to_streamed_response_wrapper(
             steps.retrieve,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             steps.list,
         )

@@ -1,20 +1,47 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from .runs import Runs, AsyncRuns, RunsWithRawResponse, AsyncRunsWithRawResponse
-from .threads import Threads, AsyncThreads, ThreadsWithRawResponse, AsyncThreadsWithRawResponse
-from .messages import Messages, AsyncMessages, MessagesWithRawResponse, AsyncMessagesWithRawResponse
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
 
 __all__ = [
     "Runs",
     "AsyncRuns",
     "RunsWithRawResponse",
     "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
     "Messages",
     "AsyncMessages",
     "MessagesWithRawResponse",
     "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
     "Threads",
     "AsyncThreads",
     "ThreadsWithRawResponse",
     "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
 ]

@@ -6,14 +6,29 @@ from typing import List, Optional
 
 import httpx
 
-from .runs import Runs, AsyncRuns, RunsWithRawResponse, AsyncRunsWithRawResponse
-from .messages import Messages, AsyncMessages, MessagesWithRawResponse, AsyncMessagesWithRawResponse
+from .... import _legacy_response
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ...._utils import maybe_transform
 from .runs.runs import Runs, AsyncRuns
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....types.beta import (
     Thread,
     ThreadDeleted,
@@ -43,6 +58,10 @@ class Threads(SyncAPIResource):
     def with_raw_response(self) -> ThreadsWithRawResponse:
         return ThreadsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+        return ThreadsWithStreamingResponse(self)
+
     def create(
         self,
         *,
@@ -278,6 +297,10 @@ class AsyncThreads(AsyncAPIResource):
     def with_raw_response(self) -> AsyncThreadsWithRawResponse:
         return AsyncThreadsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+        return AsyncThreadsWithStreamingResponse(self)
+
     async def create(
         self,
         *,
@@ -505,19 +528,19 @@ class ThreadsWithRawResponse:
         self.runs = RunsWithRawResponse(threads.runs)
         self.messages = MessagesWithRawResponse(threads.messages)
 
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             threads.create,
         )
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             threads.retrieve,
         )
-        self.update = to_raw_response_wrapper(
+        self.update = _legacy_response.to_raw_response_wrapper(
             threads.update,
         )
-        self.delete = to_raw_response_wrapper(
+        self.delete = _legacy_response.to_raw_response_wrapper(
             threads.delete,
         )
-        self.create_and_run = to_raw_response_wrapper(
+        self.create_and_run = _legacy_response.to_raw_response_wrapper(
             threads.create_and_run,
         )
 
@@ -527,18 +550,62 @@ class AsyncThreadsWithRawResponse:
         self.runs = AsyncRunsWithRawResponse(threads.runs)
         self.messages = AsyncMessagesWithRawResponse(threads.messages)
 
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            threads.update,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = _legacy_response.async_to_raw_response_wrapper(
+            threads.create_and_run,
+        )
+
+
+class ThreadsWithStreamingResponse:
+    def __init__(self, threads: Threads) -> None:
+        self.runs = RunsWithStreamingResponse(threads.runs)
+        self.messages = MessagesWithStreamingResponse(threads.messages)
+
+        self.create = to_streamed_response_wrapper(
+            threads.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            threads.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            threads.update,
+        )
+        self.delete = to_streamed_response_wrapper(
+            threads.delete,
+        )
+        self.create_and_run = to_streamed_response_wrapper(
+            threads.create_and_run,
+        )
+
+
+class AsyncThreadsWithStreamingResponse:
+    def __init__(self, threads: AsyncThreads) -> None:
+        self.runs = AsyncRunsWithStreamingResponse(threads.runs)
+        self.messages = AsyncMessagesWithStreamingResponse(threads.messages)
+
+        self.create = async_to_streamed_response_wrapper(
             threads.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = async_to_streamed_response_wrapper(
             threads.retrieve,
         )
-        self.update = async_to_raw_response_wrapper(
+        self.update = async_to_streamed_response_wrapper(
             threads.update,
         )
-        self.delete = async_to_raw_response_wrapper(
+        self.delete = async_to_streamed_response_wrapper(
             threads.delete,
         )
-        self.create_and_run = async_to_raw_response_wrapper(
+        self.create_and_run = async_to_streamed_response_wrapper(
             threads.create_and_run,
         )

@@ -1,20 +1,47 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from .beta import Beta, AsyncBeta, BetaWithRawResponse, AsyncBetaWithRawResponse
-from .threads import Threads, AsyncThreads, ThreadsWithRawResponse, AsyncThreadsWithRawResponse
-from .assistants import Assistants, AsyncAssistants, AssistantsWithRawResponse, AsyncAssistantsWithRawResponse
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .assistants import (
+    Assistants,
+    AsyncAssistants,
+    AssistantsWithRawResponse,
+    AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
+)
 
 __all__ = [
     "Assistants",
     "AsyncAssistants",
     "AssistantsWithRawResponse",
     "AsyncAssistantsWithRawResponse",
+    "AssistantsWithStreamingResponse",
+    "AsyncAssistantsWithStreamingResponse",
     "Threads",
     "AsyncThreads",
     "ThreadsWithRawResponse",
     "AsyncThreadsWithRawResponse",
+    "ThreadsWithStreamingResponse",
+    "AsyncThreadsWithStreamingResponse",
     "Beta",
     "AsyncBeta",
     "BetaWithRawResponse",
     "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
 ]

@@ -2,9 +2,23 @@
 
 from __future__ import annotations
 
-from .threads import Threads, AsyncThreads, ThreadsWithRawResponse, AsyncThreadsWithRawResponse
+from .threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
 from ..._compat import cached_property
-from .assistants import Assistants, AsyncAssistants, AssistantsWithRawResponse, AsyncAssistantsWithRawResponse
+from .assistants import (
+    Assistants,
+    AsyncAssistants,
+    AssistantsWithRawResponse,
+    AsyncAssistantsWithRawResponse,
+    AssistantsWithStreamingResponse,
+    AsyncAssistantsWithStreamingResponse,
+)
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from .threads.threads import Threads, AsyncThreads
 from .assistants.assistants import Assistants, AsyncAssistants
@@ -25,6 +39,10 @@ class Beta(SyncAPIResource):
     def with_raw_response(self) -> BetaWithRawResponse:
         return BetaWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> BetaWithStreamingResponse:
+        return BetaWithStreamingResponse(self)
+
 
 class AsyncBeta(AsyncAPIResource):
     @cached_property
@@ -39,6 +57,10 @@ class AsyncBeta(AsyncAPIResource):
     def with_raw_response(self) -> AsyncBetaWithRawResponse:
         return AsyncBetaWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        return AsyncBetaWithStreamingResponse(self)
+
 
 class BetaWithRawResponse:
     def __init__(self, beta: Beta) -> None:
@@ -50,3 +72,15 @@ class AsyncBetaWithRawResponse:
     def __init__(self, beta: AsyncBeta) -> None:
         self.assistants = AsyncAssistantsWithRawResponse(beta.assistants)
         self.threads = AsyncThreadsWithRawResponse(beta.threads)
+
+
+class BetaWithStreamingResponse:
+    def __init__(self, beta: Beta) -> None:
+        self.assistants = AssistantsWithStreamingResponse(beta.assistants)
+        self.threads = ThreadsWithStreamingResponse(beta.threads)
+
+
+class AsyncBetaWithStreamingResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self.assistants = AsyncAssistantsWithStreamingResponse(beta.assistants)
+        self.threads = AsyncThreadsWithStreamingResponse(beta.threads)

@@ -1,15 +1,33 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from .chat import Chat, AsyncChat, ChatWithRawResponse, AsyncChatWithRawResponse
-from .completions import Completions, AsyncCompletions, CompletionsWithRawResponse, AsyncCompletionsWithRawResponse
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
 
 __all__ = [
     "Completions",
     "AsyncCompletions",
     "CompletionsWithRawResponse",
     "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
     "Chat",
     "AsyncChat",
     "ChatWithRawResponse",
     "AsyncChatWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
 ]

@@ -4,7 +4,14 @@ from __future__ import annotations
 
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from .completions import Completions, AsyncCompletions, CompletionsWithRawResponse, AsyncCompletionsWithRawResponse
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
 
 __all__ = ["Chat", "AsyncChat"]
 
@@ -18,6 +25,10 @@ class Chat(SyncAPIResource):
     def with_raw_response(self) -> ChatWithRawResponse:
         return ChatWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> ChatWithStreamingResponse:
+        return ChatWithStreamingResponse(self)
+
 
 class AsyncChat(AsyncAPIResource):
     @cached_property
@@ -28,6 +39,10 @@ class AsyncChat(AsyncAPIResource):
     def with_raw_response(self) -> AsyncChatWithRawResponse:
         return AsyncChatWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
+        return AsyncChatWithStreamingResponse(self)
+
 
 class ChatWithRawResponse:
     def __init__(self, chat: Chat) -> None:
@@ -37,3 +52,13 @@ class ChatWithRawResponse:
 class AsyncChatWithRawResponse:
     def __init__(self, chat: AsyncChat) -> None:
         self.completions = AsyncCompletionsWithRawResponse(chat.completions)
+
+
+class ChatWithStreamingResponse:
+    def __init__(self, chat: Chat) -> None:
+        self.completions = CompletionsWithStreamingResponse(chat.completions)
+
+
+class AsyncChatWithStreamingResponse:
+    def __init__(self, chat: AsyncChat) -> None:
+        self.completions = AsyncCompletionsWithStreamingResponse(chat.completions)

@@ -7,11 +7,12 @@ from typing_extensions import Literal
 
 import httpx
 
+from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ..._utils import required_args, maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..._streaming import Stream, AsyncStream
 from ...types.chat import (
     ChatCompletion,
@@ -33,6 +34,10 @@ class Completions(SyncAPIResource):
     def with_raw_response(self) -> CompletionsWithRawResponse:
         return CompletionsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        return CompletionsWithStreamingResponse(self)
+
     @overload
     def create(
         self,
@@ -681,6 +686,10 @@ class AsyncCompletions(AsyncAPIResource):
     def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
         return AsyncCompletionsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        return AsyncCompletionsWithStreamingResponse(self)
+
     @overload
     async def create(
         self,
@@ -1326,13 +1335,27 @@ class AsyncCompletions(AsyncAPIResource):
 
 class CompletionsWithRawResponse:
     def __init__(self, completions: Completions) -> None:
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             completions.create,
         )
 
 
 class AsyncCompletionsWithRawResponse:
     def __init__(self, completions: AsyncCompletions) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self.create = async_to_streamed_response_wrapper(
             completions.create,
         )

@@ -1,15 +1,33 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from .jobs import Jobs, AsyncJobs, JobsWithRawResponse, AsyncJobsWithRawResponse
-from .fine_tuning import FineTuning, AsyncFineTuning, FineTuningWithRawResponse, AsyncFineTuningWithRawResponse
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
+from .fine_tuning import (
+    FineTuning,
+    AsyncFineTuning,
+    FineTuningWithRawResponse,
+    AsyncFineTuningWithRawResponse,
+    FineTuningWithStreamingResponse,
+    AsyncFineTuningWithStreamingResponse,
+)
 
 __all__ = [
     "Jobs",
     "AsyncJobs",
     "JobsWithRawResponse",
     "AsyncJobsWithRawResponse",
+    "JobsWithStreamingResponse",
+    "AsyncJobsWithStreamingResponse",
     "FineTuning",
     "AsyncFineTuning",
     "FineTuningWithRawResponse",
     "AsyncFineTuningWithRawResponse",
+    "FineTuningWithStreamingResponse",
+    "AsyncFineTuningWithStreamingResponse",
 ]

@@ -2,7 +2,14 @@
 
 from __future__ import annotations
 
-from .jobs import Jobs, AsyncJobs, JobsWithRawResponse, AsyncJobsWithRawResponse
+from .jobs import (
+    Jobs,
+    AsyncJobs,
+    JobsWithRawResponse,
+    AsyncJobsWithRawResponse,
+    JobsWithStreamingResponse,
+    AsyncJobsWithStreamingResponse,
+)
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 
@@ -18,6 +25,10 @@ class FineTuning(SyncAPIResource):
     def with_raw_response(self) -> FineTuningWithRawResponse:
         return FineTuningWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> FineTuningWithStreamingResponse:
+        return FineTuningWithStreamingResponse(self)
+
 
 class AsyncFineTuning(AsyncAPIResource):
     @cached_property
@@ -28,6 +39,10 @@ class AsyncFineTuning(AsyncAPIResource):
     def with_raw_response(self) -> AsyncFineTuningWithRawResponse:
         return AsyncFineTuningWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncFineTuningWithStreamingResponse:
+        return AsyncFineTuningWithStreamingResponse(self)
+
 
 class FineTuningWithRawResponse:
     def __init__(self, fine_tuning: FineTuning) -> None:
@@ -37,3 +52,13 @@ class FineTuningWithRawResponse:
 class AsyncFineTuningWithRawResponse:
     def __init__(self, fine_tuning: AsyncFineTuning) -> None:
         self.jobs = AsyncJobsWithRawResponse(fine_tuning.jobs)
+
+
+class FineTuningWithStreamingResponse:
+    def __init__(self, fine_tuning: FineTuning) -> None:
+        self.jobs = JobsWithStreamingResponse(fine_tuning.jobs)
+
+
+class AsyncFineTuningWithStreamingResponse:
+    def __init__(self, fine_tuning: AsyncFineTuning) -> None:
+        self.jobs = AsyncJobsWithStreamingResponse(fine_tuning.jobs)

@@ -7,11 +7,12 @@ from typing_extensions import Literal
 
 import httpx
 
+from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ..._utils import maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ...pagination import SyncCursorPage, AsyncCursorPage
 from ..._base_client import (
     AsyncPaginator,
@@ -33,6 +34,10 @@ class Jobs(SyncAPIResource):
     def with_raw_response(self) -> JobsWithRawResponse:
         return JobsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> JobsWithStreamingResponse:
+        return JobsWithStreamingResponse(self)
+
     def create(
         self,
         *,
@@ -284,6 +289,10 @@ class AsyncJobs(AsyncAPIResource):
     def with_raw_response(self) -> AsyncJobsWithRawResponse:
         return AsyncJobsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncJobsWithStreamingResponse:
+        return AsyncJobsWithStreamingResponse(self)
+
     async def create(
         self,
         *,
@@ -532,37 +541,75 @@ class AsyncJobs(AsyncAPIResource):
 
 class JobsWithRawResponse:
     def __init__(self, jobs: Jobs) -> None:
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             jobs.create,
         )
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             jobs.retrieve,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             jobs.list,
         )
-        self.cancel = to_raw_response_wrapper(
+        self.cancel = _legacy_response.to_raw_response_wrapper(
             jobs.cancel,
         )
-        self.list_events = to_raw_response_wrapper(
+        self.list_events = _legacy_response.to_raw_response_wrapper(
             jobs.list_events,
         )
 
 
 class AsyncJobsWithRawResponse:
     def __init__(self, jobs: AsyncJobs) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = _legacy_response.async_to_raw_response_wrapper(
+            jobs.list_events,
+        )
+
+
+class JobsWithStreamingResponse:
+    def __init__(self, jobs: Jobs) -> None:
+        self.create = to_streamed_response_wrapper(
+            jobs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            jobs.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            jobs.cancel,
+        )
+        self.list_events = to_streamed_response_wrapper(
+            jobs.list_events,
+        )
+
+
+class AsyncJobsWithStreamingResponse:
+    def __init__(self, jobs: AsyncJobs) -> None:
+        self.create = async_to_streamed_response_wrapper(
             jobs.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = async_to_streamed_response_wrapper(
             jobs.retrieve,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             jobs.list,
         )
-        self.cancel = async_to_raw_response_wrapper(
+        self.cancel = async_to_streamed_response_wrapper(
             jobs.cancel,
         )
-        self.list_events = async_to_raw_response_wrapper(
+        self.list_events = async_to_streamed_response_wrapper(
             jobs.list_events,
         )

@@ -1,55 +1,145 @@
 # File generated from our OpenAPI spec by Stainless.
 
-from .beta import Beta, AsyncBeta, BetaWithRawResponse, AsyncBetaWithRawResponse
-from .chat import Chat, AsyncChat, ChatWithRawResponse, AsyncChatWithRawResponse
-from .audio import Audio, AsyncAudio, AudioWithRawResponse, AsyncAudioWithRawResponse
-from .files import Files, AsyncFiles, FilesWithRawResponse, AsyncFilesWithRawResponse
-from .images import Images, AsyncImages, ImagesWithRawResponse, AsyncImagesWithRawResponse
-from .models import Models, AsyncModels, ModelsWithRawResponse, AsyncModelsWithRawResponse
-from .embeddings import Embeddings, AsyncEmbeddings, EmbeddingsWithRawResponse, AsyncEmbeddingsWithRawResponse
-from .completions import Completions, AsyncCompletions, CompletionsWithRawResponse, AsyncCompletionsWithRawResponse
-from .fine_tuning import FineTuning, AsyncFineTuning, FineTuningWithRawResponse, AsyncFineTuningWithRawResponse
-from .moderations import Moderations, AsyncModerations, ModerationsWithRawResponse, AsyncModerationsWithRawResponse
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .images import (
+    Images,
+    AsyncImages,
+    ImagesWithRawResponse,
+    AsyncImagesWithRawResponse,
+    ImagesWithStreamingResponse,
+    AsyncImagesWithStreamingResponse,
+)
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
+from .embeddings import (
+    Embeddings,
+    AsyncEmbeddings,
+    EmbeddingsWithRawResponse,
+    AsyncEmbeddingsWithRawResponse,
+    EmbeddingsWithStreamingResponse,
+    AsyncEmbeddingsWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+from .fine_tuning import (
+    FineTuning,
+    AsyncFineTuning,
+    FineTuningWithRawResponse,
+    AsyncFineTuningWithRawResponse,
+    FineTuningWithStreamingResponse,
+    AsyncFineTuningWithStreamingResponse,
+)
+from .moderations import (
+    Moderations,
+    AsyncModerations,
+    ModerationsWithRawResponse,
+    AsyncModerationsWithRawResponse,
+    ModerationsWithStreamingResponse,
+    AsyncModerationsWithStreamingResponse,
+)
 
 __all__ = [
     "Completions",
     "AsyncCompletions",
     "CompletionsWithRawResponse",
     "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
     "Chat",
     "AsyncChat",
     "ChatWithRawResponse",
     "AsyncChatWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
     "Embeddings",
     "AsyncEmbeddings",
     "EmbeddingsWithRawResponse",
     "AsyncEmbeddingsWithRawResponse",
+    "EmbeddingsWithStreamingResponse",
+    "AsyncEmbeddingsWithStreamingResponse",
     "Files",
     "AsyncFiles",
     "FilesWithRawResponse",
     "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
     "Images",
     "AsyncImages",
     "ImagesWithRawResponse",
     "AsyncImagesWithRawResponse",
+    "ImagesWithStreamingResponse",
+    "AsyncImagesWithStreamingResponse",
     "Audio",
     "AsyncAudio",
     "AudioWithRawResponse",
     "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
     "Moderations",
     "AsyncModerations",
     "ModerationsWithRawResponse",
     "AsyncModerationsWithRawResponse",
+    "ModerationsWithStreamingResponse",
+    "AsyncModerationsWithStreamingResponse",
     "Models",
     "AsyncModels",
     "ModelsWithRawResponse",
     "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
     "FineTuning",
     "AsyncFineTuning",
     "FineTuningWithRawResponse",
     "AsyncFineTuningWithRawResponse",
+    "FineTuningWithStreamingResponse",
+    "AsyncFineTuningWithStreamingResponse",
     "Beta",
     "AsyncBeta",
     "BetaWithRawResponse",
     "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
 ]

@@ -7,12 +7,13 @@ from typing_extensions import Literal
 
 import httpx
 
+from .. import _legacy_response
 from ..types import Completion, completion_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import required_args, maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._streaming import Stream, AsyncStream
 from .._base_client import (
     make_request_options,
@@ -26,6 +27,10 @@ class Completions(SyncAPIResource):
     def with_raw_response(self) -> CompletionsWithRawResponse:
         return CompletionsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        return CompletionsWithStreamingResponse(self)
+
     @overload
     def create(
         self,
@@ -536,6 +541,10 @@ class AsyncCompletions(AsyncAPIResource):
     def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
         return AsyncCompletionsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        return AsyncCompletionsWithStreamingResponse(self)
+
     @overload
     async def create(
         self,
@@ -1043,13 +1052,27 @@ class AsyncCompletions(AsyncAPIResource):
 
 class CompletionsWithRawResponse:
     def __init__(self, completions: Completions) -> None:
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             completions.create,
         )
 
 
 class AsyncCompletionsWithRawResponse:
     def __init__(self, completions: AsyncCompletions) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self.create = async_to_streamed_response_wrapper(
             completions.create,
         )

@@ -8,13 +8,14 @@ from typing_extensions import Literal
 
 import httpx
 
+from .. import _legacy_response
 from ..types import CreateEmbeddingResponse, embedding_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import is_given, maybe_transform
 from .._compat import cached_property
 from .._extras import numpy as np, has_numpy
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._base_client import (
     make_request_options,
 )
@@ -27,6 +28,10 @@ class Embeddings(SyncAPIResource):
     def with_raw_response(self) -> EmbeddingsWithRawResponse:
         return EmbeddingsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        return EmbeddingsWithStreamingResponse(self)
+
     def create(
         self,
         *,
@@ -119,6 +124,10 @@ class AsyncEmbeddings(AsyncAPIResource):
     def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
         return AsyncEmbeddingsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        return AsyncEmbeddingsWithStreamingResponse(self)
+
     async def create(
         self,
         *,
@@ -208,13 +217,27 @@ class AsyncEmbeddings(AsyncAPIResource):
 
 class EmbeddingsWithRawResponse:
     def __init__(self, embeddings: Embeddings) -> None:
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             embeddings.create,
         )
 
 
 class AsyncEmbeddingsWithRawResponse:
     def __init__(self, embeddings: AsyncEmbeddings) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            embeddings.create,
+        )
+
+
+class EmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self.create = to_streamed_response_wrapper(
+            embeddings.create,
+        )
+
+
+class AsyncEmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: AsyncEmbeddings) -> None:
+        self.create = async_to_streamed_response_wrapper(
             embeddings.create,
         )

@@ -9,16 +9,23 @@ from typing_extensions import Literal
 
 import httpx
 
+from .. import _legacy_response
 from ..types import FileObject, FileDeleted, file_list_params, file_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
 from .._utils import extract_files, maybe_transform, deepcopy_minimal
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import (
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_streamed_response_wrapper,
+    async_to_streamed_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
 from ..pagination import SyncPage, AsyncPage
 from .._base_client import (
     AsyncPaginator,
-    HttpxBinaryResponseContent,
     make_request_options,
 )
 
@@ -30,6 +37,10 @@ class Files(SyncAPIResource):
     def with_raw_response(self) -> FilesWithRawResponse:
         return FilesWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        return FilesWithStreamingResponse(self)
+
     def create(
         self,
         *,
@@ -209,7 +220,7 @@ class Files(SyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> HttpxBinaryResponseContent:
+    ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Returns the contents of the specified file.
 
@@ -227,7 +238,7 @@ class Files(SyncAPIResource):
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=HttpxBinaryResponseContent,
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
         )
 
     @typing_extensions.deprecated("The `.content()` method should be used instead")
@@ -292,6 +303,10 @@ class AsyncFiles(AsyncAPIResource):
     def with_raw_response(self) -> AsyncFilesWithRawResponse:
         return AsyncFilesWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        return AsyncFilesWithStreamingResponse(self)
+
     async def create(
         self,
         *,
@@ -471,7 +486,7 @@ class AsyncFiles(AsyncAPIResource):
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> HttpxBinaryResponseContent:
+    ) -> _legacy_response.HttpxBinaryResponseContent:
         """
         Returns the contents of the specified file.
 
@@ -489,7 +504,7 @@ class AsyncFiles(AsyncAPIResource):
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=HttpxBinaryResponseContent,
+            cast_to=_legacy_response.HttpxBinaryResponseContent,
         )
 
     @typing_extensions.deprecated("The `.content()` method should be used instead")
@@ -551,43 +566,97 @@ class AsyncFiles(AsyncAPIResource):
 
 class FilesWithRawResponse:
     def __init__(self, files: Files) -> None:
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             files.create,
         )
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             files.retrieve,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             files.list,
         )
-        self.delete = to_raw_response_wrapper(
+        self.delete = _legacy_response.to_raw_response_wrapper(
             files.delete,
         )
-        self.content = to_raw_response_wrapper(
+        self.content = _legacy_response.to_raw_response_wrapper(
             files.content,
         )
-        self.retrieve_content = to_raw_response_wrapper(  # pyright: ignore[reportDeprecated]
-            files.retrieve_content  # pyright: ignore[reportDeprecated],
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.to_raw_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
         )
 
 
 class AsyncFilesWithRawResponse:
     def __init__(self, files: AsyncFiles) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            files.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = _legacy_response.async_to_raw_response_wrapper(
+            files.content,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            _legacy_response.async_to_raw_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = to_custom_streamed_response_wrapper(
+            files.content,
+            StreamedBinaryAPIResponse,
+        )
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            to_streamed_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self.create = async_to_streamed_response_wrapper(
             files.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = async_to_streamed_response_wrapper(
             files.retrieve,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             files.list,
         )
-        self.delete = async_to_raw_response_wrapper(
+        self.delete = async_to_streamed_response_wrapper(
             files.delete,
         )
-        self.content = async_to_raw_response_wrapper(
+        self.content = async_to_custom_streamed_response_wrapper(
             files.content,
+            AsyncStreamedBinaryAPIResponse,
         )
-        self.retrieve_content = async_to_raw_response_wrapper(  # pyright: ignore[reportDeprecated]
-            files.retrieve_content  # pyright: ignore[reportDeprecated],
+        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
+            async_to_streamed_response_wrapper(
+                files.retrieve_content  # pyright: ignore[reportDeprecated],
+            )
         )

@@ -7,6 +7,7 @@ from typing_extensions import Literal
 
 import httpx
 
+from .. import _legacy_response
 from ..types import (
     ImagesResponse,
     image_edit_params,
@@ -17,7 +18,7 @@ from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
 from .._utils import extract_files, maybe_transform, deepcopy_minimal
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._base_client import (
     make_request_options,
 )
@@ -30,6 +31,10 @@ class Images(SyncAPIResource):
     def with_raw_response(self) -> ImagesWithRawResponse:
         return ImagesWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> ImagesWithStreamingResponse:
+        return ImagesWithStreamingResponse(self)
+
     def create_variation(
         self,
         *,
@@ -273,6 +278,10 @@ class AsyncImages(AsyncAPIResource):
     def with_raw_response(self) -> AsyncImagesWithRawResponse:
         return AsyncImagesWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncImagesWithStreamingResponse:
+        return AsyncImagesWithStreamingResponse(self)
+
     async def create_variation(
         self,
         *,
@@ -513,25 +522,51 @@ class AsyncImages(AsyncAPIResource):
 
 class ImagesWithRawResponse:
     def __init__(self, images: Images) -> None:
-        self.create_variation = to_raw_response_wrapper(
+        self.create_variation = _legacy_response.to_raw_response_wrapper(
             images.create_variation,
         )
-        self.edit = to_raw_response_wrapper(
+        self.edit = _legacy_response.to_raw_response_wrapper(
             images.edit,
         )
-        self.generate = to_raw_response_wrapper(
+        self.generate = _legacy_response.to_raw_response_wrapper(
             images.generate,
         )
 
 
 class AsyncImagesWithRawResponse:
     def __init__(self, images: AsyncImages) -> None:
-        self.create_variation = async_to_raw_response_wrapper(
+        self.create_variation = _legacy_response.async_to_raw_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = _legacy_response.async_to_raw_response_wrapper(
+            images.edit,
+        )
+        self.generate = _legacy_response.async_to_raw_response_wrapper(
+            images.generate,
+        )
+
+
+class ImagesWithStreamingResponse:
+    def __init__(self, images: Images) -> None:
+        self.create_variation = to_streamed_response_wrapper(
+            images.create_variation,
+        )
+        self.edit = to_streamed_response_wrapper(
+            images.edit,
+        )
+        self.generate = to_streamed_response_wrapper(
+            images.generate,
+        )
+
+
+class AsyncImagesWithStreamingResponse:
+    def __init__(self, images: AsyncImages) -> None:
+        self.create_variation = async_to_streamed_response_wrapper(
             images.create_variation,
         )
-        self.edit = async_to_raw_response_wrapper(
+        self.edit = async_to_streamed_response_wrapper(
             images.edit,
         )
-        self.generate = async_to_raw_response_wrapper(
+        self.generate = async_to_streamed_response_wrapper(
             images.generate,
         )

@@ -4,11 +4,12 @@ from __future__ import annotations
 
 import httpx
 
+from .. import _legacy_response
 from ..types import Model, ModelDeleted
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..pagination import SyncPage, AsyncPage
 from .._base_client import (
     AsyncPaginator,
@@ -23,6 +24,10 @@ class Models(SyncAPIResource):
     def with_raw_response(self) -> ModelsWithRawResponse:
         return ModelsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        return ModelsWithStreamingResponse(self)
+
     def retrieve(
         self,
         model: str,
@@ -117,6 +122,10 @@ class AsyncModels(AsyncAPIResource):
     def with_raw_response(self) -> AsyncModelsWithRawResponse:
         return AsyncModelsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        return AsyncModelsWithStreamingResponse(self)
+
     async def retrieve(
         self,
         model: str,
@@ -208,25 +217,51 @@ class AsyncModels(AsyncAPIResource):
 
 class ModelsWithRawResponse:
     def __init__(self, models: Models) -> None:
-        self.retrieve = to_raw_response_wrapper(
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
             models.retrieve,
         )
-        self.list = to_raw_response_wrapper(
+        self.list = _legacy_response.to_raw_response_wrapper(
             models.list,
         )
-        self.delete = to_raw_response_wrapper(
+        self.delete = _legacy_response.to_raw_response_wrapper(
             models.delete,
         )
 
 
 class AsyncModelsWithRawResponse:
     def __init__(self, models: AsyncModels) -> None:
-        self.retrieve = async_to_raw_response_wrapper(
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            models.delete,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            models.delete,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self.retrieve = async_to_streamed_response_wrapper(
             models.retrieve,
         )
-        self.list = async_to_raw_response_wrapper(
+        self.list = async_to_streamed_response_wrapper(
             models.list,
         )
-        self.delete = async_to_raw_response_wrapper(
+        self.delete = async_to_streamed_response_wrapper(
             models.delete,
         )

@@ -7,12 +7,13 @@ from typing_extensions import Literal
 
 import httpx
 
+from .. import _legacy_response
 from ..types import ModerationCreateResponse, moderation_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_raw_response_wrapper, async_to_raw_response_wrapper
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._base_client import (
     make_request_options,
 )
@@ -25,6 +26,10 @@ class Moderations(SyncAPIResource):
     def with_raw_response(self) -> ModerationsWithRawResponse:
         return ModerationsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> ModerationsWithStreamingResponse:
+        return ModerationsWithStreamingResponse(self)
+
     def create(
         self,
         *,
@@ -81,6 +86,10 @@ class AsyncModerations(AsyncAPIResource):
     def with_raw_response(self) -> AsyncModerationsWithRawResponse:
         return AsyncModerationsWithRawResponse(self)
 
+    @cached_property
+    def with_streaming_response(self) -> AsyncModerationsWithStreamingResponse:
+        return AsyncModerationsWithStreamingResponse(self)
+
     async def create(
         self,
         *,
@@ -134,13 +143,27 @@ class AsyncModerations(AsyncAPIResource):
 
 class ModerationsWithRawResponse:
     def __init__(self, moderations: Moderations) -> None:
-        self.create = to_raw_response_wrapper(
+        self.create = _legacy_response.to_raw_response_wrapper(
             moderations.create,
         )
 
 
 class AsyncModerationsWithRawResponse:
     def __init__(self, moderations: AsyncModerations) -> None:
-        self.create = async_to_raw_response_wrapper(
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            moderations.create,
+        )
+
+
+class ModerationsWithStreamingResponse:
+    def __init__(self, moderations: Moderations) -> None:
+        self.create = to_streamed_response_wrapper(
+            moderations.create,
+        )
+
+
+class AsyncModerationsWithStreamingResponse:
+    def __init__(self, moderations: AsyncModerations) -> None:
+        self.create = async_to_streamed_response_wrapper(
             moderations.create,
         )

@@ -10,6 +10,7 @@ from ._types import NoneType, Transport, ProxiesTypes
 from ._utils import file_from_path
 from ._client import Client, OpenAI, Stream, Timeout, Transport, AsyncClient, AsyncOpenAI, AsyncStream, RequestOptions
 from ._version import __title__, __version__
+from ._response import APIResponse as APIResponse, AsyncAPIResponse as AsyncAPIResponse
 from ._exceptions import (
     APIError,
     OpenAIError,

@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import os
 import json
 import time
 import uuid
@@ -31,7 +30,7 @@ from typing import (
     overload,
 )
 from functools import lru_cache
-from typing_extensions import Literal, override
+from typing_extensions import Literal, override, get_origin
 
 import anyio
 import httpx
@@ -61,18 +60,22 @@ from ._types import (
     AsyncTransport,
     RequestOptions,
     ModelBuilderProtocol,
-    BinaryResponseContent,
 )
 from ._utils import is_dict, is_given, is_mapping
 from ._compat import model_copy, model_dump
 from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
-from ._response import APIResponse
+from ._response import (
+    APIResponse,
+    BaseAPIResponse,
+    AsyncAPIResponse,
+    extract_response_type,
+)
 from ._constants import (
     DEFAULT_LIMITS,
     DEFAULT_TIMEOUT,
     DEFAULT_MAX_RETRIES,
     RAW_RESPONSE_HEADER,
-    STREAMED_RAW_RESPONSE_HEADER,
+    OVERRIDE_CAST_TO_HEADER,
 )
 from ._streaming import Stream, AsyncStream
 from ._exceptions import (
@@ -81,6 +84,7 @@ from ._exceptions import (
     APIConnectionError,
     APIResponseValidationError,
 )
+from ._legacy_response import LegacyAPIResponse
 
 log: logging.Logger = logging.getLogger(__name__)
 
@@ -493,28 +497,25 @@ class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
             serialized[key] = value
         return serialized
 
-    def _process_response(
-        self,
-        *,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        response: httpx.Response,
-        stream: bool,
-        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
-    ) -> ResponseT:
-        api_response = APIResponse(
-            raw=response,
-            client=self,
-            cast_to=cast_to,
-            stream=stream,
-            stream_cls=stream_cls,
-            options=options,
-        )
+    def _maybe_override_cast_to(self, cast_to: type[ResponseT], options: FinalRequestOptions) -> type[ResponseT]:
+        if not is_given(options.headers):
+            return cast_to
 
-        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
-            return cast(ResponseT, api_response)
+        # make a copy of the headers so we don't mutate user-input
+        headers = dict(options.headers)
 
-        return api_response.parse()
+        # we internally support defining a temporary header to override the
+        # default `cast_to` type for use with `.with_raw_response` and `.with_streaming_response`
+        # see _response.py for implementation details
+        override_cast_to = headers.pop(OVERRIDE_CAST_TO_HEADER, NOT_GIVEN)
+        if is_given(override_cast_to):
+            options.headers = headers
+            return cast(Type[ResponseT], override_cast_to)
+
+        return cast_to
+
+    def _should_stream_response_body(self, request: httpx.Request) -> bool:
+        return request.headers.get(RAW_RESPONSE_HEADER) == "stream"  # type: ignore[no-any-return]
 
     def _process_response_data(
         self,
@@ -540,12 +541,6 @@ class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
         except pydantic.ValidationError as err:
             raise APIResponseValidationError(response=response, body=data) from err
 
-    def _should_stream_response_body(self, *, request: httpx.Request) -> bool:
-        if request.headers.get(STREAMED_RAW_RESPONSE_HEADER) == "true":
-            return True
-
-        return False
-
     @property
     def qs(self) -> Querystring:
         return Querystring()
@@ -610,6 +605,8 @@ class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
             if response_headers is not None:
                 retry_header = response_headers.get("retry-after")
                 try:
+                    # note: the spec indicates that this should only ever be an integer
+                    # but if someone sends a float there's no reason for us to not respect it
                     retry_after = float(retry_header)
                 except Exception:
                     retry_date_tuple = email.utils.parsedate_tz(retry_header)
@@ -873,6 +870,7 @@ class SyncAPIClient(BaseClient[httpx.Client, Stream[Any]]):
         stream: bool,
         stream_cls: type[_StreamT] | None,
     ) -> ResponseT | _StreamT:
+        cast_to = self._maybe_override_cast_to(cast_to, options)
         self._prepare_options(options)
 
         retries = self._remaining_retries(remaining_retries, options)
@@ -987,6 +985,63 @@ class SyncAPIClient(BaseClient[httpx.Client, Stream[Any]]):
             stream_cls=stream_cls,
         )
 
+    def _process_response(
+        self,
+        *,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        response: httpx.Response,
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+    ) -> ResponseT:
+        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
+            return cast(
+                ResponseT,
+                LegacyAPIResponse(
+                    raw=response,
+                    client=self,
+                    cast_to=cast_to,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                ),
+            )
+
+        origin = get_origin(cast_to) or cast_to
+
+        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+            if not issubclass(origin, APIResponse):
+                raise TypeError(f"API Response types must subclass {APIResponse}; Received {origin}")
+
+            response_cls = cast("type[BaseAPIResponse[Any]]", cast_to)
+            return cast(
+                ResponseT,
+                response_cls(
+                    raw=response,
+                    client=self,
+                    cast_to=extract_response_type(response_cls),
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                ),
+            )
+
+        if cast_to == httpx.Response:
+            return cast(ResponseT, response)
+
+        api_response = APIResponse(
+            raw=response,
+            client=self,
+            cast_to=cast("type[ResponseT]", cast_to),  # pyright: ignore[reportUnnecessaryCast]
+            stream=stream,
+            stream_cls=stream_cls,
+            options=options,
+        )
+        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
+            return cast(ResponseT, api_response)
+
+        return api_response.parse()
+
     def _request_api_list(
         self,
         model: Type[object],
@@ -1353,6 +1408,7 @@ class AsyncAPIClient(BaseClient[httpx.AsyncClient, AsyncStream[Any]]):
         stream_cls: type[_AsyncStreamT] | None,
         remaining_retries: int | None,
     ) -> ResponseT | _AsyncStreamT:
+        cast_to = self._maybe_override_cast_to(cast_to, options)
         await self._prepare_options(options)
 
         retries = self._remaining_retries(remaining_retries, options)
@@ -1428,7 +1484,7 @@ class AsyncAPIClient(BaseClient[httpx.AsyncClient, AsyncStream[Any]]):
             log.debug("Re-raising status error")
             raise self._make_status_error_from_response(err.response) from None
 
-        return self._process_response(
+        return await self._process_response(
             cast_to=cast_to,
             options=options,
             response=response,
@@ -1465,6 +1521,63 @@ class AsyncAPIClient(BaseClient[httpx.AsyncClient, AsyncStream[Any]]):
             stream_cls=stream_cls,
         )
 
+    async def _process_response(
+        self,
+        *,
+        cast_to: Type[ResponseT],
+        options: FinalRequestOptions,
+        response: httpx.Response,
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+    ) -> ResponseT:
+        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
+            return cast(
+                ResponseT,
+                LegacyAPIResponse(
+                    raw=response,
+                    client=self,
+                    cast_to=cast_to,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                ),
+            )
+
+        origin = get_origin(cast_to) or cast_to
+
+        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+            if not issubclass(origin, AsyncAPIResponse):
+                raise TypeError(f"API Response types must subclass {AsyncAPIResponse}; Received {origin}")
+
+            response_cls = cast("type[BaseAPIResponse[Any]]", cast_to)
+            return cast(
+                "ResponseT",
+                response_cls(
+                    raw=response,
+                    client=self,
+                    cast_to=extract_response_type(response_cls),
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    options=options,
+                ),
+            )
+
+        if cast_to == httpx.Response:
+            return cast(ResponseT, response)
+
+        api_response = AsyncAPIResponse(
+            raw=response,
+            client=self,
+            cast_to=cast("type[ResponseT]", cast_to),  # pyright: ignore[reportUnnecessaryCast]
+            stream=stream,
+            stream_cls=stream_cls,
+            options=options,
+        )
+        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
+            return cast(ResponseT, api_response)
+
+        return await api_response.parse()
+
     def _request_api_list(
         self,
         model: Type[_T],
@@ -1783,105 +1896,3 @@ def _merge_mappings(
     """
     merged = {**obj1, **obj2}
     return {key: value for key, value in merged.items() if not isinstance(value, Omit)}
-
-
-class HttpxBinaryResponseContent(BinaryResponseContent):
-    response: httpx.Response
-
-    def __init__(self, response: httpx.Response) -> None:
-        self.response = response
-
-    @property
-    @override
-    def content(self) -> bytes:
-        return self.response.content
-
-    @property
-    @override
-    def text(self) -> str:
-        return self.response.text
-
-    @property
-    @override
-    def encoding(self) -> Optional[str]:
-        return self.response.encoding
-
-    @property
-    @override
-    def charset_encoding(self) -> Optional[str]:
-        return self.response.charset_encoding
-
-    @override
-    def json(self, **kwargs: Any) -> Any:
-        return self.response.json(**kwargs)
-
-    @override
-    def read(self) -> bytes:
-        return self.response.read()
-
-    @override
-    def iter_bytes(self, chunk_size: Optional[int] = None) -> Iterator[bytes]:
-        return self.response.iter_bytes(chunk_size)
-
-    @override
-    def iter_text(self, chunk_size: Optional[int] = None) -> Iterator[str]:
-        return self.response.iter_text(chunk_size)
-
-    @override
-    def iter_lines(self) -> Iterator[str]:
-        return self.response.iter_lines()
-
-    @override
-    def iter_raw(self, chunk_size: Optional[int] = None) -> Iterator[bytes]:
-        return self.response.iter_raw(chunk_size)
-
-    @override
-    def stream_to_file(
-        self,
-        file: str | os.PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        with open(file, mode="wb") as f:
-            for data in self.response.iter_bytes(chunk_size):
-                f.write(data)
-
-    @override
-    def close(self) -> None:
-        return self.response.close()
-
-    @override
-    async def aread(self) -> bytes:
-        return await self.response.aread()
-
-    @override
-    async def aiter_bytes(self, chunk_size: Optional[int] = None) -> AsyncIterator[bytes]:
-        return self.response.aiter_bytes(chunk_size)
-
-    @override
-    async def aiter_text(self, chunk_size: Optional[int] = None) -> AsyncIterator[str]:
-        return self.response.aiter_text(chunk_size)
-
-    @override
-    async def aiter_lines(self) -> AsyncIterator[str]:
-        return self.response.aiter_lines()
-
-    @override
-    async def aiter_raw(self, chunk_size: Optional[int] = None) -> AsyncIterator[bytes]:
-        return self.response.aiter_raw(chunk_size)
-
-    @override
-    async def astream_to_file(
-        self,
-        file: str | os.PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        path = anyio.Path(file)
-        async with await path.open(mode="wb") as f:
-            async for data in self.response.aiter_bytes(chunk_size):
-                await f.write(data)
-
-    @override
-    async def aclose(self) -> None:
-        return await self.response.aclose()

@@ -58,6 +58,7 @@ class OpenAI(SyncAPIClient):
     fine_tuning: resources.FineTuning
     beta: resources.Beta
     with_raw_response: OpenAIWithRawResponse
+    with_streaming_response: OpenAIWithStreamedResponse
 
     # client options
     api_key: str
@@ -132,6 +133,7 @@ class OpenAI(SyncAPIClient):
         self.fine_tuning = resources.FineTuning(self)
         self.beta = resources.Beta(self)
         self.with_raw_response = OpenAIWithRawResponse(self)
+        self.with_streaming_response = OpenAIWithStreamedResponse(self)
 
     @property
     @override
@@ -254,6 +256,7 @@ class AsyncOpenAI(AsyncAPIClient):
     fine_tuning: resources.AsyncFineTuning
     beta: resources.AsyncBeta
     with_raw_response: AsyncOpenAIWithRawResponse
+    with_streaming_response: AsyncOpenAIWithStreamedResponse
 
     # client options
     api_key: str
@@ -328,6 +331,7 @@ class AsyncOpenAI(AsyncAPIClient):
         self.fine_tuning = resources.AsyncFineTuning(self)
         self.beta = resources.AsyncBeta(self)
         self.with_raw_response = AsyncOpenAIWithRawResponse(self)
+        self.with_streaming_response = AsyncOpenAIWithStreamedResponse(self)
 
     @property
     @override
@@ -466,6 +470,34 @@ class AsyncOpenAIWithRawResponse:
         self.beta = resources.AsyncBetaWithRawResponse(client.beta)
 
 
+class OpenAIWithStreamedResponse:
+    def __init__(self, client: OpenAI) -> None:
+        self.completions = resources.CompletionsWithStreamingResponse(client.completions)
+        self.chat = resources.ChatWithStreamingResponse(client.chat)
+        self.embeddings = resources.EmbeddingsWithStreamingResponse(client.embeddings)
+        self.files = resources.FilesWithStreamingResponse(client.files)
+        self.images = resources.ImagesWithStreamingResponse(client.images)
+        self.audio = resources.AudioWithStreamingResponse(client.audio)
+        self.moderations = resources.ModerationsWithStreamingResponse(client.moderations)
+        self.models = resources.ModelsWithStreamingResponse(client.models)
+        self.fine_tuning = resources.FineTuningWithStreamingResponse(client.fine_tuning)
+        self.beta = resources.BetaWithStreamingResponse(client.beta)
+
+
+class AsyncOpenAIWithStreamedResponse:
+    def __init__(self, client: AsyncOpenAI) -> None:
+        self.completions = resources.AsyncCompletionsWithStreamingResponse(client.completions)
+        self.chat = resources.AsyncChatWithStreamingResponse(client.chat)
+        self.embeddings = resources.AsyncEmbeddingsWithStreamingResponse(client.embeddings)
+        self.files = resources.AsyncFilesWithStreamingResponse(client.files)
+        self.images = resources.AsyncImagesWithStreamingResponse(client.images)
+        self.audio = resources.AsyncAudioWithStreamingResponse(client.audio)
+        self.moderations = resources.AsyncModerationsWithStreamingResponse(client.moderations)
+        self.models = resources.AsyncModelsWithStreamingResponse(client.models)
+        self.fine_tuning = resources.AsyncFineTuningWithStreamingResponse(client.fine_tuning)
+        self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
+
+
 Client = OpenAI
 
 AsyncClient = AsyncOpenAI

@@ -3,7 +3,7 @@
 import httpx
 
 RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
-STREAMED_RAW_RESPONSE_HEADER = "X-Stainless-Streamed-Raw-Response"
+OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
 
 # default timeout is 10 minutes
 DEFAULT_TIMEOUT = httpx.Timeout(timeout=600.0, connect=5.0)

@@ -0,0 +1,385 @@
+from __future__ import annotations
+
+import os
+import inspect
+import logging
+import datetime
+import functools
+from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, Iterator, AsyncIterator, cast
+from typing_extensions import Awaitable, ParamSpec, get_args, override, deprecated, get_origin
+
+import anyio
+import httpx
+
+from ._types import NoneType
+from ._utils import is_given
+from ._models import BaseModel, is_basemodel
+from ._constants import RAW_RESPONSE_HEADER
+from ._exceptions import APIResponseValidationError
+
+if TYPE_CHECKING:
+    from ._models import FinalRequestOptions
+    from ._base_client import Stream, BaseClient, AsyncStream
+
+
+P = ParamSpec("P")
+R = TypeVar("R")
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class LegacyAPIResponse(Generic[R]):
+    """This is a legacy class as it will be replaced by `APIResponse`
+    and `AsyncAPIResponse` in the `_response.py` file in the next major
+    release.
+
+    For the sync client this will mostly be the same with the exception
+    of `content` & `text` will be methods instead of properties. In the
+    async client, all methods will be async.
+
+    A migration script will be provided & the migration in general should
+    be smooth.
+    """
+
+    _cast_to: type[R]
+    _client: BaseClient[Any, Any]
+    _parsed: R | None
+    _stream: bool
+    _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
+    _options: FinalRequestOptions
+
+    http_response: httpx.Response
+
+    def __init__(
+        self,
+        *,
+        raw: httpx.Response,
+        cast_to: type[R],
+        client: BaseClient[Any, Any],
+        stream: bool,
+        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        options: FinalRequestOptions,
+    ) -> None:
+        self._cast_to = cast_to
+        self._client = client
+        self._parsed = None
+        self._stream = stream
+        self._stream_cls = stream_cls
+        self._options = options
+        self.http_response = raw
+
+    def parse(self) -> R:
+        """Returns the rich python representation of this response's data.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+
+        NOTE: For the async client: this will become a coroutine in the next major version.
+        """
+        if self._parsed is not None:
+            return self._parsed
+
+        parsed = self._parse()
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        self._parsed = parsed
+        return parsed
+
+    @property
+    def headers(self) -> httpx.Headers:
+        return self.http_response.headers
+
+    @property
+    def http_request(self) -> httpx.Request:
+        return self.http_response.request
+
+    @property
+    def status_code(self) -> int:
+        return self.http_response.status_code
+
+    @property
+    def url(self) -> httpx.URL:
+        return self.http_response.url
+
+    @property
+    def method(self) -> str:
+        return self.http_request.method
+
+    @property
+    def content(self) -> bytes:
+        """Return the binary response content.
+
+        NOTE: this will be removed in favour of `.read()` in the
+        next major version.
+        """
+        return self.http_response.content
+
+    @property
+    def text(self) -> str:
+        """Return the decoded response content.
+
+        NOTE: this will be turned into a method in the next major version.
+        """
+        return self.http_response.text
+
+    @property
+    def http_version(self) -> str:
+        return self.http_response.http_version
+
+    @property
+    def is_closed(self) -> bool:
+        return self.http_response.is_closed
+
+    @property
+    def elapsed(self) -> datetime.timedelta:
+        """The time taken for the complete request/response cycle to complete."""
+        return self.http_response.elapsed
+
+    def _parse(self) -> R:
+        if self._stream:
+            if self._stream_cls:
+                return cast(
+                    R,
+                    self._stream_cls(
+                        cast_to=_extract_stream_chunk_type(self._stream_cls),
+                        response=self.http_response,
+                        client=cast(Any, self._client),
+                    ),
+                )
+
+            stream_cls = cast("type[Stream[Any]] | type[AsyncStream[Any]] | None", self._client._default_stream_cls)
+            if stream_cls is None:
+                raise MissingStreamClassError()
+
+            return cast(
+                R,
+                stream_cls(
+                    cast_to=self._cast_to,
+                    response=self.http_response,
+                    client=cast(Any, self._client),
+                ),
+            )
+
+        cast_to = self._cast_to
+        if cast_to is NoneType:
+            return cast(R, None)
+
+        response = self.http_response
+        if cast_to == str:
+            return cast(R, response.text)
+
+        origin = get_origin(cast_to) or cast_to
+
+        if inspect.isclass(origin) and issubclass(origin, HttpxBinaryResponseContent):
+            return cast(R, cast_to(response))  # type: ignore
+
+        if origin == LegacyAPIResponse:
+            raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
+
+        if inspect.isclass(origin) and issubclass(origin, httpx.Response):
+            # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
+            # and pass that class to our request functions. We cannot change the variance to be either
+            # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
+            # the response class ourselves but that is something that should be supported directly in httpx
+            # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
+            if cast_to != httpx.Response:
+                raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
+            return cast(R, response)
+
+        # The check here is necessary as we are subverting the the type system
+        # with casts as the relationship between TypeVars and Types are very strict
+        # which means we must return *exactly* what was input or transform it in a
+        # way that retains the TypeVar state. As we cannot do that in this function
+        # then we have to resort to using `cast`. At the time of writing, we know this
+        # to be safe as we have handled all the types that could be bound to the
+        # `ResponseT` TypeVar, however if that TypeVar is ever updated in the future, then
+        # this function would become unsafe but a type checker would not report an error.
+        if (
+            cast_to is not object
+            and not origin is list
+            and not origin is dict
+            and not origin is Union
+            and not issubclass(origin, BaseModel)
+        ):
+            raise RuntimeError(
+                f"Invalid state, expected {cast_to} to be a subclass type of {BaseModel}, {dict}, {list} or {Union}."
+            )
+
+        # split is required to handle cases where additional information is included
+        # in the response, e.g. application/json; charset=utf-8
+        content_type, *_ = response.headers.get("content-type").split(";")
+        if content_type != "application/json":
+            if is_basemodel(cast_to):
+                try:
+                    data = response.json()
+                except Exception as exc:
+                    log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
+                else:
+                    return self._client._process_response_data(
+                        data=data,
+                        cast_to=cast_to,  # type: ignore
+                        response=response,
+                    )
+
+            if self._client._strict_response_validation:
+                raise APIResponseValidationError(
+                    response=response,
+                    message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",
+                    body=response.text,
+                )
+
+            # If the API responds with content that isn't JSON then we just return
+            # the (decoded) text without performing any parsing so that you can still
+            # handle the response however you need to.
+            return response.text  # type: ignore
+
+        data = response.json()
+
+        return self._client._process_response_data(
+            data=data,
+            cast_to=cast_to,  # type: ignore
+            response=response,
+        )
+
+    @override
+    def __repr__(self) -> str:
+        return f"<APIResponse [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
+
+
+class MissingStreamClassError(TypeError):
+    def __init__(self) -> None:
+        super().__init__(
+            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference",
+        )
+
+
+def _extract_stream_chunk_type(stream_cls: type) -> type:
+    args = get_args(stream_cls)
+    if not args:
+        raise TypeError(
+            f"Expected stream_cls to have been given a generic type argument, e.g. Stream[Foo] but received {stream_cls}",
+        )
+    return cast(type, args[0])
+
+
+def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, LegacyAPIResponse[R]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
+        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "true"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(LegacyAPIResponse[R], func(*args, **kwargs))
+
+    return wrapped
+
+
+def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[LegacyAPIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
+        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "true"
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(LegacyAPIResponse[R], await func(*args, **kwargs))
+
+    return wrapped
+
+
+class HttpxBinaryResponseContent:
+    response: httpx.Response
+
+    def __init__(self, response: httpx.Response) -> None:
+        self.response = response
+
+    @property
+    def content(self) -> bytes:
+        return self.response.content
+
+    @property
+    def text(self) -> str:
+        return self.response.text
+
+    @property
+    def encoding(self) -> str | None:
+        return self.response.encoding
+
+    @property
+    def charset_encoding(self) -> str | None:
+        return self.response.charset_encoding
+
+    def json(self, **kwargs: Any) -> Any:
+        return self.response.json(**kwargs)
+
+    def read(self) -> bytes:
+        return self.response.read()
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        return self.response.iter_bytes(chunk_size)
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        return self.response.iter_text(chunk_size)
+
+    def iter_lines(self) -> Iterator[str]:
+        return self.response.iter_lines()
+
+    def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        return self.response.iter_raw(chunk_size)
+
+    @deprecated(
+        "Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead"
+    )
+    def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        with open(file, mode="wb") as f:
+            for data in self.response.iter_bytes(chunk_size):
+                f.write(data)
+
+    def close(self) -> None:
+        return self.response.close()
+
+    async def aread(self) -> bytes:
+        return await self.response.aread()
+
+    async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        return self.response.aiter_bytes(chunk_size)
+
+    async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        return self.response.aiter_text(chunk_size)
+
+    async def aiter_lines(self) -> AsyncIterator[str]:
+        return self.response.aiter_lines()
+
+    async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        return self.response.aiter_raw(chunk_size)
+
+    @deprecated(
+        "Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead"
+    )
+    async def astream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.response.aiter_bytes(chunk_size):
+                await f.write(data)
+
+    async def aclose(self) -> None:
+        return await self.response.aclose()

@@ -1,19 +1,32 @@
 from __future__ import annotations
 
+import os
 import inspect
 import logging
 import datetime
 import functools
-from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, cast
+from types import TracebackType
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    Generic,
+    TypeVar,
+    Callable,
+    Iterator,
+    AsyncIterator,
+    cast,
+)
 from typing_extensions import Awaitable, ParamSpec, override, get_origin
 
+import anyio
 import httpx
 
-from ._types import NoneType, BinaryResponseContent
+from ._types import NoneType
 from ._utils import is_given, extract_type_var_from_base
 from ._models import BaseModel, is_basemodel
-from ._constants import RAW_RESPONSE_HEADER
-from ._exceptions import APIResponseValidationError
+from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
+from ._exceptions import OpenAIError, APIResponseValidationError
 
 if TYPE_CHECKING:
     from ._models import FinalRequestOptions
@@ -22,15 +35,17 @@ if TYPE_CHECKING:
 
 P = ParamSpec("P")
 R = TypeVar("R")
+_APIResponseT = TypeVar("_APIResponseT", bound="APIResponse[Any]")
+_AsyncAPIResponseT = TypeVar("_AsyncAPIResponseT", bound="AsyncAPIResponse[Any]")
 
 log: logging.Logger = logging.getLogger(__name__)
 
 
-class APIResponse(Generic[R]):
+class BaseAPIResponse(Generic[R]):
     _cast_to: type[R]
     _client: BaseClient[Any, Any]
     _parsed: R | None
-    _stream: bool
+    _is_sse_stream: bool
     _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
     _options: FinalRequestOptions
 
@@ -49,28 +64,18 @@ class APIResponse(Generic[R]):
         self._cast_to = cast_to
         self._client = client
         self._parsed = None
-        self._stream = stream
+        self._is_sse_stream = stream
         self._stream_cls = stream_cls
         self._options = options
         self.http_response = raw
 
-    def parse(self) -> R:
-        if self._parsed is not None:
-            return self._parsed
-
-        parsed = self._parse()
-        if is_given(self._options.post_parser):
-            parsed = self._options.post_parser(parsed)
-
-        self._parsed = parsed
-        return parsed
-
     @property
     def headers(self) -> httpx.Headers:
         return self.http_response.headers
 
     @property
     def http_request(self) -> httpx.Request:
+        """Returns the httpx Request instance associated with the current response."""
         return self.http_response.request
 
     @property
@@ -79,20 +84,13 @@ class APIResponse(Generic[R]):
 
     @property
     def url(self) -> httpx.URL:
+        """Returns the URL for which the request was made."""
         return self.http_response.url
 
     @property
     def method(self) -> str:
         return self.http_request.method
 
-    @property
-    def content(self) -> bytes:
-        return self.http_response.content
-
-    @property
-    def text(self) -> str:
-        return self.http_response.text
-
     @property
     def http_version(self) -> str:
         return self.http_response.http_version
@@ -102,13 +100,29 @@ class APIResponse(Generic[R]):
         """The time taken for the complete request/response cycle to complete."""
         return self.http_response.elapsed
 
+    @property
+    def is_closed(self) -> bool:
+        """Whether or not the response body has been closed.
+
+        If this is False then there is response data that has not been read yet.
+        You must either fully consume the response body or call `.close()`
+        before discarding the response to prevent resource leaks.
+        """
+        return self.http_response.is_closed
+
+    @override
+    def __repr__(self) -> str:
+        return (
+            f"<{self.__class__.__name__} [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
+        )
+
     def _parse(self) -> R:
-        if self._stream:
+        if self._is_sse_stream:
             if self._stream_cls:
                 return cast(
                     R,
                     self._stream_cls(
-                        cast_to=_extract_stream_chunk_type(self._stream_cls),
+                        cast_to=extract_stream_chunk_type(self._stream_cls),
                         response=self.http_response,
                         client=cast(Any, self._client),
                     ),
@@ -135,9 +149,13 @@ class APIResponse(Generic[R]):
         if cast_to == str:
             return cast(R, response.text)
 
+        if cast_to == bytes:
+            return cast(R, response.content)
+
         origin = get_origin(cast_to) or cast_to
 
-        if inspect.isclass(origin) and issubclass(origin, BinaryResponseContent):
+        # handle the legacy binary response case
+        if inspect.isclass(cast_to) and cast_to.__name__ == "HttpxBinaryResponseContent":
             return cast(R, cast_to(response))  # type: ignore
 
         if origin == APIResponse:
@@ -208,9 +226,227 @@ class APIResponse(Generic[R]):
             response=response,
         )
 
-    @override
-    def __repr__(self) -> str:
-        return f"<APIResponse [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
+
+class APIResponse(BaseAPIResponse[R]):
+    def parse(self) -> R:
+        """Returns the rich python representation of this response's data.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+        """
+        if self._parsed is not None:
+            return self._parsed
+
+        if not self._is_sse_stream:
+            self.read()
+
+        parsed = self._parse()
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        self._parsed = parsed
+        return parsed
+
+    def read(self) -> bytes:
+        """Read and return the binary response content."""
+        try:
+            return self.http_response.read()
+        except httpx.StreamConsumed as exc:
+            # The default error raised by httpx isn't very
+            # helpful in our case so we re-raise it with
+            # a different error message.
+            raise StreamAlreadyConsumed() from exc
+
+    def text(self) -> str:
+        """Read and decode the response content into a string."""
+        self.read()
+        return self.http_response.text
+
+    def json(self) -> object:
+        """Read and decode the JSON response content."""
+        self.read()
+        return self.http_response.json()
+
+    def close(self) -> None:
+        """Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self.http_response.close()
+
+    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
+        """
+        A byte-iterator over the decoded response content.
+
+        This automatically handles gzip, deflate and brotli encoded responses.
+        """
+        for chunk in self.http_response.iter_bytes(chunk_size):
+            yield chunk
+
+    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
+        """A str-iterator over the decoded response content
+        that handles both gzip, deflate, etc but also detects the content's
+        string encoding.
+        """
+        for chunk in self.http_response.iter_text(chunk_size):
+            yield chunk
+
+    def iter_lines(self) -> Iterator[str]:
+        """Like `iter_text()` but will only yield chunks for each line"""
+        for chunk in self.http_response.iter_lines():
+            yield chunk
+
+
+class AsyncAPIResponse(BaseAPIResponse[R]):
+    async def parse(self) -> R:
+        """Returns the rich python representation of this response's data.
+
+        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
+        """
+        if self._parsed is not None:
+            return self._parsed
+
+        if not self._is_sse_stream:
+            await self.read()
+
+        parsed = self._parse()
+        if is_given(self._options.post_parser):
+            parsed = self._options.post_parser(parsed)
+
+        self._parsed = parsed
+        return parsed
+
+    async def read(self) -> bytes:
+        """Read and return the binary response content."""
+        try:
+            return await self.http_response.aread()
+        except httpx.StreamConsumed as exc:
+            # the default error raised by httpx isn't very
+            # helpful in our case so we re-raise it with
+            # a different error message
+            raise StreamAlreadyConsumed() from exc
+
+    async def text(self) -> str:
+        """Read and decode the response content into a string."""
+        await self.read()
+        return self.http_response.text
+
+    async def json(self) -> object:
+        """Read and decode the JSON response content."""
+        await self.read()
+        return self.http_response.json()
+
+    async def close(self) -> None:
+        """Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self.http_response.aclose()
+
+    async def iter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
+        """
+        A byte-iterator over the decoded response content.
+
+        This automatically handles gzip, deflate and brotli encoded responses.
+        """
+        async for chunk in self.http_response.aiter_bytes(chunk_size):
+            yield chunk
+
+    async def iter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
+        """A str-iterator over the decoded response content
+        that handles both gzip, deflate, etc but also detects the content's
+        string encoding.
+        """
+        async for chunk in self.http_response.aiter_text(chunk_size):
+            yield chunk
+
+    async def iter_lines(self) -> AsyncIterator[str]:
+        """Like `iter_text()` but will only yield chunks for each line"""
+        async for chunk in self.http_response.aiter_lines():
+            yield chunk
+
+
+class BinaryAPIResponse(APIResponse[bytes]):
+    """Subclass of APIResponse providing helpers for dealing with binary data.
+
+    Note: If you want to stream the response data instead of eagerly reading it
+    all at once then you should use `.with_streaming_response` when making
+    the API request, e.g. `.with_streaming_response.get_binary_response()`
+    """
+
+    def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `.with_streaming_response.get_binary_response()`
+        """
+        with open(file, mode="wb") as f:
+            for data in self.iter_bytes():
+                f.write(data)
+
+
+class AsyncBinaryAPIResponse(AsyncAPIResponse[bytes]):
+    """Subclass of APIResponse providing helpers for dealing with binary data.
+
+    Note: If you want to stream the response data instead of eagerly reading it
+    all at once then you should use `.with_streaming_response` when making
+    the API request, e.g. `.with_streaming_response.get_binary_response()`
+    """
+
+    async def write_to_file(
+        self,
+        file: str | os.PathLike[str],
+    ) -> None:
+        """Write the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+
+        Note: if you want to stream the data to the file instead of writing
+        all at once then you should use `.with_streaming_response` when making
+        the API request, e.g. `.with_streaming_response.get_binary_response()`
+        """
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.iter_bytes():
+                await f.write(data)
+
+
+class StreamedBinaryAPIResponse(APIResponse[bytes]):
+    def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        """Streams the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+        """
+        with open(file, mode="wb") as f:
+            for data in self.iter_bytes(chunk_size):
+                f.write(data)
+
+
+class AsyncStreamedBinaryAPIResponse(AsyncAPIResponse[bytes]):
+    async def stream_to_file(
+        self,
+        file: str | os.PathLike[str],
+        *,
+        chunk_size: int | None = None,
+    ) -> None:
+        """Streams the output to the given file.
+
+        Accepts a filename or any path-like object, e.g. pathlib.Path
+        """
+        path = anyio.Path(file)
+        async with await path.open(mode="wb") as f:
+            async for data in self.iter_bytes(chunk_size):
+                await f.write(data)
 
 
 class MissingStreamClassError(TypeError):
@@ -220,14 +456,176 @@ class MissingStreamClassError(TypeError):
         )
 
 
-def _extract_stream_chunk_type(stream_cls: type) -> type:
-    from ._base_client import Stream, AsyncStream
+class StreamAlreadyConsumed(OpenAIError):
+    """
+    Attempted to read or stream content, but the content has already
+    been streamed.
 
-    return extract_type_var_from_base(
-        stream_cls,
-        index=0,
-        generic_bases=cast("tuple[type, ...]", (Stream, AsyncStream)),
-    )
+    This can happen if you use a method like `.iter_lines()` and then attempt
+    to read th entire response body afterwards, e.g.
+
+    ```py
+    response = await client.post(...)
+    async for line in response.iter_lines():
+        ...  # do something with `line`
+
+    content = await response.read()
+    # ^ error
+    ```
+
+    If you want this behaviour you'll need to either manually accumulate the response
+    content or call `await response.read()` before iterating over the stream.
+    """
+
+    def __init__(self) -> None:
+        message = (
+            "Attempted to read or stream some content, but the content has "
+            "already been streamed. "
+            "This could be due to attempting to stream the response "
+            "content more than once."
+            "\n\n"
+            "You can fix this by manually accumulating the response content while streaming "
+            "or by calling `.read()` before starting to stream."
+        )
+        super().__init__(message)
+
+
+class ResponseContextManager(Generic[_APIResponseT]):
+    """Context manager for ensuring that a request is not made
+    until it is entered and that the response will always be closed
+    when the context manager exits
+    """
+
+    def __init__(self, request_func: Callable[[], _APIResponseT]) -> None:
+        self._request_func = request_func
+        self.__response: _APIResponseT | None = None
+
+    def __enter__(self) -> _APIResponseT:
+        self.__response = self._request_func()
+        return self.__response
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__response is not None:
+            self.__response.close()
+
+
+class AsyncResponseContextManager(Generic[_AsyncAPIResponseT]):
+    """Context manager for ensuring that a request is not made
+    until it is entered and that the response will always be closed
+    when the context manager exits
+    """
+
+    def __init__(self, api_request: Awaitable[_AsyncAPIResponseT]) -> None:
+        self._api_request = api_request
+        self.__response: _AsyncAPIResponseT | None = None
+
+    async def __aenter__(self) -> _AsyncAPIResponseT:
+        self.__response = await self._api_request
+        return self.__response
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__response is not None:
+            await self.__response.close()
+
+
+def to_streamed_response_wrapper(func: Callable[P, R]) -> Callable[P, ResponseContextManager[APIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support streaming and returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[APIResponse[R]]:
+        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = functools.partial(func, *args, **kwargs)
+
+        return ResponseContextManager(cast(Callable[[], APIResponse[R]], make_request))
+
+    return wrapped
+
+
+def async_to_streamed_response_wrapper(
+    func: Callable[P, Awaitable[R]],
+) -> Callable[P, AsyncResponseContextManager[AsyncAPIResponse[R]]]:
+    """Higher order function that takes one of our bound API methods and wraps it
+    to support streaming and returning the raw `APIResponse` object directly.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[AsyncAPIResponse[R]]:
+        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = func(*args, **kwargs)
+
+        return AsyncResponseContextManager(cast(Awaitable[AsyncAPIResponse[R]], make_request))
+
+    return wrapped
+
+
+def to_custom_streamed_response_wrapper(
+    func: Callable[P, object],
+    response_cls: type[_APIResponseT],
+) -> Callable[P, ResponseContextManager[_APIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support streaming and returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[_APIResponseT]:
+        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = functools.partial(func, *args, **kwargs)
+
+        return ResponseContextManager(cast(Callable[[], _APIResponseT], make_request))
+
+    return wrapped
+
+
+def async_to_custom_streamed_response_wrapper(
+    func: Callable[P, Awaitable[object]],
+    response_cls: type[_AsyncAPIResponseT],
+) -> Callable[P, AsyncResponseContextManager[_AsyncAPIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support streaming and returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[_AsyncAPIResponseT]:
+        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "stream"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        make_request = func(*args, **kwargs)
+
+        return AsyncResponseContextManager(cast(Awaitable[_AsyncAPIResponseT], make_request))
+
+    return wrapped
 
 
 def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, APIResponse[R]]:
@@ -238,7 +636,7 @@ def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, APIResponse[R]]
     @functools.wraps(func)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> APIResponse[R]:
         extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "true"
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
 
         kwargs["extra_headers"] = extra_headers
 
@@ -247,18 +645,102 @@ def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, APIResponse[R]]
     return wrapped
 
 
-def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[APIResponse[R]]]:
+def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[AsyncAPIResponse[R]]]:
     """Higher order function that takes one of our bound API methods and wraps it
     to support returning the raw `APIResponse` object directly.
     """
 
     @functools.wraps(func)
-    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> APIResponse[R]:
+    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncAPIResponse[R]:
         extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "true"
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
 
         kwargs["extra_headers"] = extra_headers
 
-        return cast(APIResponse[R], await func(*args, **kwargs))
+        return cast(AsyncAPIResponse[R], await func(*args, **kwargs))
 
     return wrapped
+
+
+def to_custom_raw_response_wrapper(
+    func: Callable[P, object],
+    response_cls: type[_APIResponseT],
+) -> Callable[P, _APIResponseT]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> _APIResponseT:
+        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(_APIResponseT, func(*args, **kwargs))
+
+    return wrapped
+
+
+def async_to_custom_raw_response_wrapper(
+    func: Callable[P, Awaitable[object]],
+    response_cls: type[_AsyncAPIResponseT],
+) -> Callable[P, Awaitable[_AsyncAPIResponseT]]:
+    """Higher order function that takes one of our bound API methods and an `APIResponse` class
+    and wraps the method to support returning the given response class directly.
+
+    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> Awaitable[_AsyncAPIResponseT]:
+        extra_headers = {**(cast(Any, kwargs.get("extra_headers")) or {})}
+        extra_headers[RAW_RESPONSE_HEADER] = "raw"
+        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
+
+        kwargs["extra_headers"] = extra_headers
+
+        return cast(Awaitable[_AsyncAPIResponseT], func(*args, **kwargs))
+
+    return wrapped
+
+
+def extract_stream_chunk_type(stream_cls: type) -> type:
+    """Given a type like `Stream[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyStream(Stream[bytes]):
+        ...
+
+    extract_stream_chunk_type(MyStream) -> bytes
+    ```
+    """
+    from ._base_client import Stream, AsyncStream
+
+    return extract_type_var_from_base(
+        stream_cls,
+        index=0,
+        generic_bases=cast("tuple[type, ...]", (Stream, AsyncStream)),
+    )
+
+
+def extract_response_type(typ: type[BaseAPIResponse[Any]]) -> type:
+    """Given a type like `APIResponse[T]`, returns the generic type variable `T`.
+
+    This also handles the case where a concrete subclass is given, e.g.
+    ```py
+    class MyResponse(APIResponse[bytes]):
+        ...
+
+    extract_response_type(MyResponse) -> bytes
+    ```
+    """
+    return extract_type_var_from_base(
+        typ,
+        generic_bases=cast("tuple[type, ...]", (BaseAPIResponse, APIResponse, AsyncAPIResponse)),
+        index=0,
+    )

@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from os import PathLike
-from abc import ABC, abstractmethod
 from typing import (
     IO,
     TYPE_CHECKING,
@@ -14,10 +13,8 @@ from typing import (
     Mapping,
     TypeVar,
     Callable,
-    Iterator,
     Optional,
     Sequence,
-    AsyncIterator,
 )
 from typing_extensions import Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
 
@@ -27,6 +24,8 @@ from httpx import URL, Proxy, Timeout, Response, BaseTransport, AsyncBaseTranspo
 
 if TYPE_CHECKING:
     from ._models import BaseModel
+    from ._response import APIResponse, AsyncAPIResponse
+    from ._legacy_response import HttpxBinaryResponseContent
 
 Transport = BaseTransport
 AsyncTransport = AsyncBaseTransport
@@ -37,162 +36,6 @@ ModelT = TypeVar("ModelT", bound=pydantic.BaseModel)
 _T = TypeVar("_T")
 
 
-class BinaryResponseContent(ABC):
-    @abstractmethod
-    def __init__(
-        self,
-        response: Any,
-    ) -> None:
-        ...
-
-    @property
-    @abstractmethod
-    def content(self) -> bytes:
-        pass
-
-    @property
-    @abstractmethod
-    def text(self) -> str:
-        pass
-
-    @property
-    @abstractmethod
-    def encoding(self) -> Optional[str]:
-        """
-        Return an encoding to use for decoding the byte content into text.
-        The priority for determining this is given by...
-
-        * `.encoding = <>` has been set explicitly.
-        * The encoding as specified by the charset parameter in the Content-Type header.
-        * The encoding as determined by `default_encoding`, which may either be
-          a string like "utf-8" indicating the encoding to use, or may be a callable
-          which enables charset autodetection.
-        """
-        pass
-
-    @property
-    @abstractmethod
-    def charset_encoding(self) -> Optional[str]:
-        """
-        Return the encoding, as specified by the Content-Type header.
-        """
-        pass
-
-    @abstractmethod
-    def json(self, **kwargs: Any) -> Any:
-        pass
-
-    @abstractmethod
-    def read(self) -> bytes:
-        """
-        Read and return the response content.
-        """
-        pass
-
-    @abstractmethod
-    def iter_bytes(self, chunk_size: Optional[int] = None) -> Iterator[bytes]:
-        """
-        A byte-iterator over the decoded response content.
-        This allows us to handle gzip, deflate, and brotli encoded responses.
-        """
-        pass
-
-    @abstractmethod
-    def iter_text(self, chunk_size: Optional[int] = None) -> Iterator[str]:
-        """
-        A str-iterator over the decoded response content
-        that handles both gzip, deflate, etc but also detects the content's
-        string encoding.
-        """
-        pass
-
-    @abstractmethod
-    def iter_lines(self) -> Iterator[str]:
-        pass
-
-    @abstractmethod
-    def iter_raw(self, chunk_size: Optional[int] = None) -> Iterator[bytes]:
-        """
-        A byte-iterator over the raw response content.
-        """
-        pass
-
-    @abstractmethod
-    def stream_to_file(
-        self,
-        file: str | PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        """
-        Stream the output to the given file.
-        """
-        pass
-
-    @abstractmethod
-    def close(self) -> None:
-        """
-        Close the response and release the connection.
-        Automatically called if the response body is read to completion.
-        """
-        pass
-
-    @abstractmethod
-    async def aread(self) -> bytes:
-        """
-        Read and return the response content.
-        """
-        pass
-
-    @abstractmethod
-    async def aiter_bytes(self, chunk_size: Optional[int] = None) -> AsyncIterator[bytes]:
-        """
-        A byte-iterator over the decoded response content.
-        This allows us to handle gzip, deflate, and brotli encoded responses.
-        """
-        pass
-
-    @abstractmethod
-    async def aiter_text(self, chunk_size: Optional[int] = None) -> AsyncIterator[str]:
-        """
-        A str-iterator over the decoded response content
-        that handles both gzip, deflate, etc but also detects the content's
-        string encoding.
-        """
-        pass
-
-    @abstractmethod
-    async def aiter_lines(self) -> AsyncIterator[str]:
-        pass
-
-    @abstractmethod
-    async def aiter_raw(self, chunk_size: Optional[int] = None) -> AsyncIterator[bytes]:
-        """
-        A byte-iterator over the raw response content.
-        """
-        pass
-
-    @abstractmethod
-    async def astream_to_file(
-        self,
-        file: str | PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        """
-        Stream the output to the given file.
-        """
-        pass
-
-    @abstractmethod
-    async def aclose(self) -> None:
-        """
-        Close the response and release the connection.
-        Automatically called if the response body is read to completion.
-        """
-        pass
-
-
 # Approximates httpx internal ProxiesTypes and RequestFiles types
 # while adding support for `PathLike` instances
 ProxiesDict = Dict["str | URL", Union[None, str, URL, Proxy]]
@@ -343,7 +186,9 @@ ResponseT = TypeVar(
         Dict[str, Any],
         Response,
         ModelBuilderProtocol,
-        BinaryResponseContent,
+        "APIResponse[Any]",
+        "AsyncAPIResponse[Any]",
+        "HttpxBinaryResponseContent",
     ],
 )
 
@@ -359,6 +204,7 @@ PostParser = Callable[[Any], Any]
 @runtime_checkable
 class InheritsGeneric(Protocol):
     """Represents a type that has inherited from `Generic`
+
     The `__orig_bases__` property can be used to determine the resolved
     type variable for a given base class.
     """

@@ -3,15 +3,19 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import httpx
 import pytest
 from respx import MockRouter
 
+import openai._legacy_response as _legacy_response
 from openai import OpenAI, AsyncOpenAI
-from openai._types import BinaryResponseContent
+from tests.utils import assert_matches_type
 from openai._client import OpenAI, AsyncOpenAI
 
+# pyright: reportDeprecated=false
+
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 api_key = "My API Key"
 
@@ -21,7 +25,6 @@ class TestSpeech:
     loose_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
     parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
@@ -31,10 +34,9 @@ class TestSpeech:
             model="string",
             voice="alloy",
         )
-        assert isinstance(speech, BinaryResponseContent)
+        assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_create_with_all_params(self, client: OpenAI, respx_mock: MockRouter) -> None:
@@ -46,23 +48,41 @@ class TestSpeech:
             response_format="mp3",
             speed=0.25,
         )
-        assert isinstance(speech, BinaryResponseContent)
+        assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
         response = client.audio.speech.with_raw_response.create(
             input="string",
             model="string",
             voice="alloy",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         speech = response.parse()
-        assert isinstance(speech, BinaryResponseContent)
-        assert speech.json() == {"foo": "bar"}
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, speech, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_streaming_response_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        with client.audio.speech.with_streaming_response.create(
+            input="string",
+            model="string",
+            voice="alloy",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            speech = response.parse()
+            assert_matches_type(bytes, speech, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
 
 
 class TestAsyncSpeech:
@@ -70,7 +90,6 @@ class TestAsyncSpeech:
     loose_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
     parametrize = pytest.mark.parametrize("client", [strict_client, loose_client], ids=["strict", "loose"])
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_method_create(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
@@ -80,10 +99,9 @@ class TestAsyncSpeech:
             model="string",
             voice="alloy",
         )
-        assert isinstance(speech, BinaryResponseContent)
+        assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_method_create_with_all_params(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
@@ -95,20 +113,38 @@ class TestAsyncSpeech:
             response_format="mp3",
             speed=0.25,
         )
-        assert isinstance(speech, BinaryResponseContent)
+        assert isinstance(speech, _legacy_response.HttpxBinaryResponseContent)
         assert speech.json() == {"foo": "bar"}
 
-    @pytest.mark.skip(reason="Mocked tests are currently broken")
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_raw_response_create(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
         response = await client.audio.speech.with_raw_response.create(
             input="string",
             model="string",
             voice="alloy",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         speech = response.parse()
-        assert isinstance(speech, BinaryResponseContent)
-        assert speech.json() == {"foo": "bar"}
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, speech, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_streaming_response_create(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        async with client.audio.speech.with_streaming_response.create(
+            input="string",
+            model="string",
+            voice="alloy",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            speech = await response.parse()
+            assert_matches_type(bytes, speech, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -46,10 +47,26 @@ class TestTranscriptions:
             file=b"raw file contents",
             model="whisper-1",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         transcription = response.parse()
         assert_matches_type(Transcription, transcription, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.audio.transcriptions.with_streaming_response.create(
+            file=b"raw file contents",
+            model="whisper-1",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            transcription = response.parse()
+            assert_matches_type(Transcription, transcription, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncTranscriptions:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -82,6 +99,22 @@ class TestAsyncTranscriptions:
             file=b"raw file contents",
             model="whisper-1",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         transcription = response.parse()
         assert_matches_type(Transcription, transcription, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.audio.transcriptions.with_streaming_response.create(
+            file=b"raw file contents",
+            model="whisper-1",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            transcription = await response.parse()
+            assert_matches_type(Transcription, transcription, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -45,10 +46,26 @@ class TestTranslations:
             file=b"raw file contents",
             model="whisper-1",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         translation = response.parse()
         assert_matches_type(Translation, translation, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.audio.translations.with_streaming_response.create(
+            file=b"raw file contents",
+            model="whisper-1",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            translation = response.parse()
+            assert_matches_type(Translation, translation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncTranslations:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -80,6 +97,22 @@ class TestAsyncTranslations:
             file=b"raw file contents",
             model="whisper-1",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         translation = response.parse()
         assert_matches_type(Translation, translation, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.audio.translations.with_streaming_response.create(
+            file=b"raw file contents",
+            model="whisper-1",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            translation = await response.parse()
+            assert_matches_type(Translation, translation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -35,10 +36,26 @@ class TestFiles:
             "file-abc123",
             file_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(AssistantFile, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.assistants.files.with_streaming_response.create(
+            "file-abc123",
+            file_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(AssistantFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         file = client.beta.assistants.files.retrieve(
@@ -53,10 +70,26 @@ class TestFiles:
             "string",
             assistant_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(AssistantFile, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.assistants.files.with_streaming_response.retrieve(
+            "string",
+            assistant_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(AssistantFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         file = client.beta.assistants.files.list(
@@ -80,10 +113,25 @@ class TestFiles:
         response = client.beta.assistants.files.with_raw_response.list(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(SyncCursorPage[AssistantFile], file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.beta.assistants.files.with_streaming_response.list(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncCursorPage[AssistantFile], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         file = client.beta.assistants.files.delete(
@@ -98,10 +146,26 @@ class TestFiles:
             "string",
             assistant_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileDeleteResponse, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.beta.assistants.files.with_streaming_response.delete(
+            "string",
+            assistant_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(FileDeleteResponse, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncFiles:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -122,10 +186,26 @@ class TestAsyncFiles:
             "file-abc123",
             file_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(AssistantFile, file, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.beta.assistants.files.with_streaming_response.create(
+            "file-abc123",
+            file_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AssistantFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
         file = await client.beta.assistants.files.retrieve(
@@ -140,10 +220,26 @@ class TestAsyncFiles:
             "string",
             assistant_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(AssistantFile, file, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_retrieve(self, client: AsyncOpenAI) -> None:
+        async with client.beta.assistants.files.with_streaming_response.retrieve(
+            "string",
+            assistant_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AssistantFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_list(self, client: AsyncOpenAI) -> None:
         file = await client.beta.assistants.files.list(
@@ -167,10 +263,25 @@ class TestAsyncFiles:
         response = await client.beta.assistants.files.with_raw_response.list(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(AsyncCursorPage[AssistantFile], file, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_list(self, client: AsyncOpenAI) -> None:
+        async with client.beta.assistants.files.with_streaming_response.list(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncCursorPage[AssistantFile], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_delete(self, client: AsyncOpenAI) -> None:
         file = await client.beta.assistants.files.delete(
@@ -185,6 +296,22 @@ class TestAsyncFiles:
             "string",
             assistant_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileDeleteResponse, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, client: AsyncOpenAI) -> None:
+        async with client.beta.assistants.files.with_streaming_response.delete(
+            "string",
+            assistant_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(FileDeleteResponse, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -37,10 +38,27 @@ class TestFiles:
             thread_id="thread_abc123",
             message_id="msg_abc123",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(MessageFile, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.threads.messages.files.with_streaming_response.retrieve(
+            "file-abc123",
+            thread_id="thread_abc123",
+            message_id="msg_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(MessageFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         file = client.beta.threads.messages.files.list(
@@ -67,10 +85,26 @@ class TestFiles:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(SyncCursorPage[MessageFile], file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.beta.threads.messages.files.with_streaming_response.list(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncCursorPage[MessageFile], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncFiles:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -93,10 +127,27 @@ class TestAsyncFiles:
             thread_id="thread_abc123",
             message_id="msg_abc123",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(MessageFile, file, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_retrieve(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.messages.files.with_streaming_response.retrieve(
+            "file-abc123",
+            thread_id="thread_abc123",
+            message_id="msg_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(MessageFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_list(self, client: AsyncOpenAI) -> None:
         file = await client.beta.threads.messages.files.list(
@@ -123,6 +174,22 @@ class TestAsyncFiles:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(AsyncCursorPage[MessageFile], file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.messages.files.with_streaming_response.list(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncCursorPage[MessageFile], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -37,10 +38,27 @@ class TestSteps:
             thread_id="string",
             run_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         step = response.parse()
         assert_matches_type(RunStep, step, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.steps.with_streaming_response.retrieve(
+            "string",
+            thread_id="string",
+            run_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            step = response.parse()
+            assert_matches_type(RunStep, step, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         step = client.beta.threads.runs.steps.list(
@@ -67,10 +85,26 @@ class TestSteps:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         step = response.parse()
         assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.steps.with_streaming_response.list(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            step = response.parse()
+            assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncSteps:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -93,10 +127,27 @@ class TestAsyncSteps:
             thread_id="string",
             run_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         step = response.parse()
         assert_matches_type(RunStep, step, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_retrieve(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.runs.steps.with_streaming_response.retrieve(
+            "string",
+            thread_id="string",
+            run_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            step = await response.parse()
+            assert_matches_type(RunStep, step, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_list(self, client: AsyncOpenAI) -> None:
         step = await client.beta.threads.runs.steps.list(
@@ -123,6 +174,22 @@ class TestAsyncSteps:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         step = response.parse()
         assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.runs.steps.with_streaming_response.list(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            step = await response.parse()
+            assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -48,10 +49,27 @@ class TestMessages:
             content="x",
             role="user",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
         assert_matches_type(ThreadMessage, message, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.threads.messages.with_streaming_response.create(
+            "string",
+            content="x",
+            role="user",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(ThreadMessage, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.retrieve(
@@ -66,10 +84,26 @@ class TestMessages:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
         assert_matches_type(ThreadMessage, message, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.threads.messages.with_streaming_response.retrieve(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(ThreadMessage, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.update(
@@ -93,10 +127,26 @@ class TestMessages:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
         assert_matches_type(ThreadMessage, message, path=["response"])
 
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.beta.threads.messages.with_streaming_response.update(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(ThreadMessage, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.list(
@@ -120,10 +170,25 @@ class TestMessages:
         response = client.beta.threads.messages.with_raw_response.list(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
         assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.beta.threads.messages.with_streaming_response.list(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(SyncCursorPage[ThreadMessage], message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncMessages:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -157,10 +222,27 @@ class TestAsyncMessages:
             content="x",
             role="user",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
         assert_matches_type(ThreadMessage, message, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.messages.with_streaming_response.create(
+            "string",
+            content="x",
+            role="user",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(ThreadMessage, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
         message = await client.beta.threads.messages.retrieve(
@@ -175,10 +257,26 @@ class TestAsyncMessages:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
         assert_matches_type(ThreadMessage, message, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_retrieve(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.messages.with_streaming_response.retrieve(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(ThreadMessage, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_update(self, client: AsyncOpenAI) -> None:
         message = await client.beta.threads.messages.update(
@@ -202,10 +300,26 @@ class TestAsyncMessages:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
         assert_matches_type(ThreadMessage, message, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_update(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.messages.with_streaming_response.update(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(ThreadMessage, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_list(self, client: AsyncOpenAI) -> None:
         message = await client.beta.threads.messages.list(
@@ -229,6 +343,21 @@ class TestAsyncMessages:
         response = await client.beta.threads.messages.with_raw_response.list(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         message = response.parse()
         assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.messages.with_streaming_response.list(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(AsyncCursorPage[ThreadMessage], message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -50,10 +51,26 @@ class TestRuns:
             "string",
             assistant_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.with_streaming_response.create(
+            "string",
+            assistant_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.retrieve(
@@ -68,10 +85,26 @@ class TestRuns:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.with_streaming_response.retrieve(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.update(
@@ -95,10 +128,26 @@ class TestRuns:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.with_streaming_response.update(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.list(
@@ -122,10 +171,25 @@ class TestRuns:
         response = client.beta.threads.runs.with_raw_response.list(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(SyncCursorPage[Run], run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.with_streaming_response.list(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(SyncCursorPage[Run], run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_cancel(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.cancel(
@@ -140,10 +204,26 @@ class TestRuns:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.with_streaming_response.cancel(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_submit_tool_outputs(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.submit_tool_outputs(
@@ -160,10 +240,27 @@ class TestRuns:
             thread_id="string",
             tool_outputs=[{}, {}, {}],
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    def test_streaming_response_submit_tool_outputs(self, client: OpenAI) -> None:
+        with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            tool_outputs=[{}, {}, {}],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncRuns:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -197,10 +294,26 @@ class TestAsyncRuns:
             "string",
             assistant_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.runs.with_streaming_response.create(
+            "string",
+            assistant_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
         run = await client.beta.threads.runs.retrieve(
@@ -215,10 +328,26 @@ class TestAsyncRuns:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_retrieve(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.runs.with_streaming_response.retrieve(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_update(self, client: AsyncOpenAI) -> None:
         run = await client.beta.threads.runs.update(
@@ -242,10 +371,26 @@ class TestAsyncRuns:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_update(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.runs.with_streaming_response.update(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_list(self, client: AsyncOpenAI) -> None:
         run = await client.beta.threads.runs.list(
@@ -269,10 +414,25 @@ class TestAsyncRuns:
         response = await client.beta.threads.runs.with_raw_response.list(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_list(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.runs.with_streaming_response.list(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_cancel(self, client: AsyncOpenAI) -> None:
         run = await client.beta.threads.runs.cancel(
@@ -287,10 +447,26 @@ class TestAsyncRuns:
             "string",
             thread_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_cancel(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.runs.with_streaming_response.cancel(
+            "string",
+            thread_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_submit_tool_outputs(self, client: AsyncOpenAI) -> None:
         run = await client.beta.threads.runs.submit_tool_outputs(
@@ -307,6 +483,23 @@ class TestAsyncRuns:
             thread_id="string",
             tool_outputs=[{}, {}, {}],
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         run = response.parse()
         assert_matches_type(Run, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_submit_tool_outputs(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
+            "string",
+            thread_id="string",
+            tool_outputs=[{}, {}, {}],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(Run, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -49,10 +50,25 @@ class TestAssistants:
         response = client.beta.assistants.with_raw_response.create(
             model="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.assistants.with_streaming_response.create(
+            model="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.retrieve(
@@ -65,10 +81,25 @@ class TestAssistants:
         response = client.beta.assistants.with_raw_response.retrieve(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.assistants.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.update(
@@ -95,10 +126,25 @@ class TestAssistants:
         response = client.beta.assistants.with_raw_response.update(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.beta.assistants.with_streaming_response.update(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.list()
@@ -117,10 +163,23 @@ class TestAssistants:
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(SyncCursorPage[Assistant], assistant, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.beta.assistants.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = response.parse()
+            assert_matches_type(SyncCursorPage[Assistant], assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.delete(
@@ -133,10 +192,25 @@ class TestAssistants:
         response = client.beta.assistants.with_raw_response.delete(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(AssistantDeleted, assistant, path=["response"])
 
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.beta.assistants.with_streaming_response.delete(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = response.parse()
+            assert_matches_type(AssistantDeleted, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncAssistants:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -168,10 +242,25 @@ class TestAsyncAssistants:
         response = await client.beta.assistants.with_raw_response.create(
             model="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.beta.assistants.with_streaming_response.create(
+            model="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = await response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
         assistant = await client.beta.assistants.retrieve(
@@ -184,10 +273,25 @@ class TestAsyncAssistants:
         response = await client.beta.assistants.with_raw_response.retrieve(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_retrieve(self, client: AsyncOpenAI) -> None:
+        async with client.beta.assistants.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = await response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_update(self, client: AsyncOpenAI) -> None:
         assistant = await client.beta.assistants.update(
@@ -214,10 +318,25 @@ class TestAsyncAssistants:
         response = await client.beta.assistants.with_raw_response.update(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(Assistant, assistant, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_update(self, client: AsyncOpenAI) -> None:
+        async with client.beta.assistants.with_streaming_response.update(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = await response.parse()
+            assert_matches_type(Assistant, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_list(self, client: AsyncOpenAI) -> None:
         assistant = await client.beta.assistants.list()
@@ -236,10 +355,23 @@ class TestAsyncAssistants:
     @parametrize
     async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
         response = await client.beta.assistants.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(AsyncCursorPage[Assistant], assistant, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_list(self, client: AsyncOpenAI) -> None:
+        async with client.beta.assistants.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = await response.parse()
+            assert_matches_type(AsyncCursorPage[Assistant], assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_delete(self, client: AsyncOpenAI) -> None:
         assistant = await client.beta.assistants.delete(
@@ -252,6 +384,21 @@ class TestAsyncAssistants:
         response = await client.beta.assistants.with_raw_response.delete(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         assistant = response.parse()
         assert_matches_type(AssistantDeleted, assistant, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, client: AsyncOpenAI) -> None:
+        async with client.beta.assistants.with_streaming_response.delete(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            assistant = await response.parse()
+            assert_matches_type(AssistantDeleted, assistant, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -59,10 +60,23 @@ class TestThreads:
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.beta.threads.with_raw_response.create()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.threads.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = response.parse()
+            assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         thread = client.beta.threads.retrieve(
@@ -75,10 +89,25 @@ class TestThreads:
         response = client.beta.threads.with_raw_response.retrieve(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.beta.threads.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = response.parse()
+            assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
         thread = client.beta.threads.update(
@@ -99,10 +128,25 @@ class TestThreads:
         response = client.beta.threads.with_raw_response.update(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.beta.threads.with_streaming_response.update(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = response.parse()
+            assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         thread = client.beta.threads.delete(
@@ -115,10 +159,25 @@ class TestThreads:
         response = client.beta.threads.with_raw_response.delete(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(ThreadDeleted, thread, path=["response"])
 
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.beta.threads.with_streaming_response.delete(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = response.parse()
+            assert_matches_type(ThreadDeleted, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_create_and_run(self, client: OpenAI) -> None:
         thread = client.beta.threads.create_and_run(
@@ -165,10 +224,25 @@ class TestThreads:
         response = client.beta.threads.with_raw_response.create_and_run(
             assistant_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Run, thread, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create_and_run(self, client: OpenAI) -> None:
+        with client.beta.threads.with_streaming_response.create_and_run(
+            assistant_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = response.parse()
+            assert_matches_type(Run, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncThreads:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -210,10 +284,23 @@ class TestAsyncThreads:
     @parametrize
     async def test_raw_response_create(self, client: AsyncOpenAI) -> None:
         response = await client.beta.threads.with_raw_response.create()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = await response.parse()
+            assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
         thread = await client.beta.threads.retrieve(
@@ -226,10 +313,25 @@ class TestAsyncThreads:
         response = await client.beta.threads.with_raw_response.retrieve(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_retrieve(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = await response.parse()
+            assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_update(self, client: AsyncOpenAI) -> None:
         thread = await client.beta.threads.update(
@@ -250,10 +352,25 @@ class TestAsyncThreads:
         response = await client.beta.threads.with_raw_response.update(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Thread, thread, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_update(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.with_streaming_response.update(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = await response.parse()
+            assert_matches_type(Thread, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_delete(self, client: AsyncOpenAI) -> None:
         thread = await client.beta.threads.delete(
@@ -266,10 +383,25 @@ class TestAsyncThreads:
         response = await client.beta.threads.with_raw_response.delete(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(ThreadDeleted, thread, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_delete(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.with_streaming_response.delete(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = await response.parse()
+            assert_matches_type(ThreadDeleted, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_create_and_run(self, client: AsyncOpenAI) -> None:
         thread = await client.beta.threads.create_and_run(
@@ -316,6 +448,21 @@ class TestAsyncThreads:
         response = await client.beta.threads.with_raw_response.create_and_run(
             assistant_id="string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         thread = response.parse()
         assert_matches_type(Run, thread, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create_and_run(self, client: AsyncOpenAI) -> None:
+        async with client.beta.threads.with_streaming_response.create_and_run(
+            assistant_id="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            thread = await response.parse()
+            assert_matches_type(Run, thread, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -107,13 +108,34 @@ class TestCompletions:
             ],
             model="gpt-3.5-turbo",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         completion = response.parse()
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="gpt-3.5-turbo",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_create_overload_2(self, client: OpenAI) -> None:
-        client.chat.completions.create(
+        completion_stream = client.chat.completions.create(
             messages=[
                 {
                     "content": "string",
@@ -123,10 +145,11 @@ class TestCompletions:
             model="gpt-3.5-turbo",
             stream=True,
         )
+        completion_stream.response.close()
 
     @parametrize
     def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
-        client.chat.completions.create(
+        completion_stream = client.chat.completions.create(
             messages=[
                 {
                     "content": "string",
@@ -185,6 +208,7 @@ class TestCompletions:
             top_p=1,
             user="user-1234",
         )
+        completion_stream.response.close()
 
     @parametrize
     def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
@@ -198,8 +222,30 @@ class TestCompletions:
             model="gpt-3.5-turbo",
             stream=True,
         )
+
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response.parse()
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="gpt-3.5-turbo",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
 
 
 class TestAsyncCompletions:
@@ -294,13 +340,34 @@ class TestAsyncCompletions:
             ],
             model="gpt-3.5-turbo",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         completion = response.parse()
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_create_overload_1(self, client: AsyncOpenAI) -> None:
+        async with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="gpt-3.5-turbo",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_create_overload_2(self, client: AsyncOpenAI) -> None:
-        await client.chat.completions.create(
+        completion_stream = await client.chat.completions.create(
             messages=[
                 {
                     "content": "string",
@@ -310,10 +377,11 @@ class TestAsyncCompletions:
             model="gpt-3.5-turbo",
             stream=True,
         )
+        await completion_stream.response.aclose()
 
     @parametrize
     async def test_method_create_with_all_params_overload_2(self, client: AsyncOpenAI) -> None:
-        await client.chat.completions.create(
+        completion_stream = await client.chat.completions.create(
             messages=[
                 {
                     "content": "string",
@@ -372,6 +440,7 @@ class TestAsyncCompletions:
             top_p=1,
             user="user-1234",
         )
+        await completion_stream.response.aclose()
 
     @parametrize
     async def test_raw_response_create_overload_2(self, client: AsyncOpenAI) -> None:
@@ -385,5 +454,27 @@ class TestAsyncCompletions:
             model="gpt-3.5-turbo",
             stream=True,
         )
+
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response.parse()
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, client: AsyncOpenAI) -> None:
+        async with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "system",
+                }
+            ],
+            model="gpt-3.5-turbo",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -53,10 +54,26 @@ class TestJobs:
             model="gpt-3.5-turbo",
             training_file="file-abc123",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.create(
+            model="gpt-3.5-turbo",
+            training_file="file-abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.retrieve(
@@ -69,10 +86,25 @@ class TestJobs:
         response = client.fine_tuning.jobs.with_raw_response.retrieve(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.retrieve(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list()
@@ -89,10 +121,23 @@ class TestJobs:
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(SyncCursorPage[FineTuningJob], job, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(SyncCursorPage[FineTuningJob], job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_cancel(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.cancel(
@@ -105,10 +150,25 @@ class TestJobs:
         response = client.fine_tuning.jobs.with_raw_response.cancel(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.cancel(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_list_events(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list_events(
@@ -130,10 +190,25 @@ class TestJobs:
         response = client.fine_tuning.jobs.with_raw_response.list_events(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(SyncCursorPage[FineTuningJobEvent], job, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list_events(self, client: OpenAI) -> None:
+        with client.fine_tuning.jobs.with_streaming_response.list_events(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(SyncCursorPage[FineTuningJobEvent], job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncJobs:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -169,10 +244,26 @@ class TestAsyncJobs:
             model="gpt-3.5-turbo",
             training_file="file-abc123",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.fine_tuning.jobs.with_streaming_response.create(
+            model="gpt-3.5-turbo",
+            training_file="file-abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
         job = await client.fine_tuning.jobs.retrieve(
@@ -185,10 +276,25 @@ class TestAsyncJobs:
         response = await client.fine_tuning.jobs.with_raw_response.retrieve(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_retrieve(self, client: AsyncOpenAI) -> None:
+        async with client.fine_tuning.jobs.with_streaming_response.retrieve(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_list(self, client: AsyncOpenAI) -> None:
         job = await client.fine_tuning.jobs.list()
@@ -205,10 +311,23 @@ class TestAsyncJobs:
     @parametrize
     async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
         response = await client.fine_tuning.jobs.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(AsyncCursorPage[FineTuningJob], job, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_list(self, client: AsyncOpenAI) -> None:
+        async with client.fine_tuning.jobs.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(AsyncCursorPage[FineTuningJob], job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_cancel(self, client: AsyncOpenAI) -> None:
         job = await client.fine_tuning.jobs.cancel(
@@ -221,10 +340,25 @@ class TestAsyncJobs:
         response = await client.fine_tuning.jobs.with_raw_response.cancel(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(FineTuningJob, job, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_cancel(self, client: AsyncOpenAI) -> None:
+        async with client.fine_tuning.jobs.with_streaming_response.cancel(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(FineTuningJob, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_list_events(self, client: AsyncOpenAI) -> None:
         job = await client.fine_tuning.jobs.list_events(
@@ -246,6 +380,21 @@ class TestAsyncJobs:
         response = await client.fine_tuning.jobs.with_raw_response.list_events(
             "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         job = response.parse()
         assert_matches_type(AsyncCursorPage[FineTuningJobEvent], job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list_events(self, client: AsyncOpenAI) -> None:
+        async with client.fine_tuning.jobs.with_streaming_response.list_events(
+            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(AsyncCursorPage[FineTuningJobEvent], job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -57,21 +58,38 @@ class TestCompletions:
             model="string",
             prompt="This is a test.",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         completion = response.parse()
         assert_matches_type(Completion, completion, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
+        with client.completions.with_streaming_response.create(
+            model="string",
+            prompt="This is a test.",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(Completion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_create_overload_2(self, client: OpenAI) -> None:
-        client.completions.create(
+        completion_stream = client.completions.create(
             model="string",
             prompt="This is a test.",
             stream=True,
         )
+        completion_stream.response.close()
 
     @parametrize
     def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
-        client.completions.create(
+        completion_stream = client.completions.create(
             model="string",
             prompt="This is a test.",
             stream=True,
@@ -90,6 +108,7 @@ class TestCompletions:
             top_p=1,
             user="user-1234",
         )
+        completion_stream.response.close()
 
     @parametrize
     def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
@@ -98,8 +117,25 @@ class TestCompletions:
             prompt="This is a test.",
             stream=True,
         )
+
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response.parse()
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.completions.with_streaming_response.create(
+            model="string",
+            prompt="This is a test.",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
 
 
 class TestAsyncCompletions:
@@ -144,21 +180,38 @@ class TestAsyncCompletions:
             model="string",
             prompt="This is a test.",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         completion = response.parse()
         assert_matches_type(Completion, completion, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_create_overload_1(self, client: AsyncOpenAI) -> None:
+        async with client.completions.with_streaming_response.create(
+            model="string",
+            prompt="This is a test.",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(Completion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_create_overload_2(self, client: AsyncOpenAI) -> None:
-        await client.completions.create(
+        completion_stream = await client.completions.create(
             model="string",
             prompt="This is a test.",
             stream=True,
         )
+        await completion_stream.response.aclose()
 
     @parametrize
     async def test_method_create_with_all_params_overload_2(self, client: AsyncOpenAI) -> None:
-        await client.completions.create(
+        completion_stream = await client.completions.create(
             model="string",
             prompt="This is a test.",
             stream=True,
@@ -177,6 +230,7 @@ class TestAsyncCompletions:
             top_p=1,
             user="user-1234",
         )
+        await completion_stream.response.aclose()
 
     @parametrize
     async def test_raw_response_create_overload_2(self, client: AsyncOpenAI) -> None:
@@ -185,5 +239,22 @@ class TestAsyncCompletions:
             prompt="This is a test.",
             stream=True,
         )
+
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response.parse()
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, client: AsyncOpenAI) -> None:
+        async with client.completions.with_streaming_response.create(
+            model="string",
+            prompt="This is a test.",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -44,10 +45,26 @@ class TestEmbeddings:
             input="The quick brown fox jumped over the lazy dog",
             model="text-embedding-ada-002",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         embedding = response.parse()
         assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.embeddings.with_streaming_response.create(
+            input="The quick brown fox jumped over the lazy dog",
+            model="text-embedding-ada-002",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            embedding = response.parse()
+            assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncEmbeddings:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -78,6 +95,22 @@ class TestAsyncEmbeddings:
             input="The quick brown fox jumped over the lazy dog",
             model="text-embedding-ada-002",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         embedding = response.parse()
         assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.embeddings.with_streaming_response.create(
+            input="The quick brown fox jumped over the lazy dog",
+            model="text-embedding-ada-002",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            embedding = await response.parse()
+            assert_matches_type(CreateEmbeddingResponse, embedding, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,15 +3,16 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import httpx
 import pytest
 from respx import MockRouter
 
+import openai._legacy_response as _legacy_response
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.types import FileObject, FileDeleted
-from openai._types import BinaryResponseContent
 from openai._client import OpenAI, AsyncOpenAI
 from openai.pagination import SyncPage, AsyncPage
 
@@ -40,10 +41,26 @@ class TestFiles:
             file=b"raw file contents",
             purpose="fine-tune",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileObject, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.files.with_streaming_response.create(
+            file=b"raw file contents",
+            purpose="fine-tune",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(FileObject, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         file = client.files.retrieve(
@@ -56,10 +73,25 @@ class TestFiles:
         response = client.files.with_raw_response.retrieve(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileObject, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.files.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(FileObject, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         file = client.files.list()
@@ -75,10 +107,23 @@ class TestFiles:
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(SyncPage[FileObject], file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.files.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncPage[FileObject], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         file = client.files.delete(
@@ -91,10 +136,25 @@ class TestFiles:
         response = client.files.with_raw_response.delete(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileDeleted, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.files.with_streaming_response.delete(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(FileDeleted, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
@@ -102,20 +162,37 @@ class TestFiles:
         file = client.files.content(
             "string",
         )
-        assert isinstance(file, BinaryResponseContent)
+        assert isinstance(file, _legacy_response.HttpxBinaryResponseContent)
         assert file.json() == {"foo": "bar"}
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
         respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
         response = client.files.with_raw_response.content(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert isinstance(file, BinaryResponseContent)
-        assert file.json() == {"foo": "bar"}
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, file, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_streaming_response_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        with client.files.with_streaming_response.content(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(bytes, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_method_retrieve_content(self, client: OpenAI) -> None:
@@ -123,6 +200,7 @@ class TestFiles:
             file = client.files.retrieve_content(
                 "string",
             )
+
         assert_matches_type(str, file, path=["response"])
 
     @parametrize
@@ -131,10 +209,26 @@ class TestFiles:
             response = client.files.with_raw_response.retrieve_content(
                 "string",
             )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(str, file, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve_content(self, client: OpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            with client.files.with_streaming_response.retrieve_content(
+                "string",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                file = response.parse()
+                assert_matches_type(str, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncFiles:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -155,10 +249,26 @@ class TestAsyncFiles:
             file=b"raw file contents",
             purpose="fine-tune",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileObject, file, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.files.with_streaming_response.create(
+            file=b"raw file contents",
+            purpose="fine-tune",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(FileObject, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_retrieve(self, client: AsyncOpenAI) -> None:
         file = await client.files.retrieve(
@@ -171,10 +281,25 @@ class TestAsyncFiles:
         response = await client.files.with_raw_response.retrieve(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileObject, file, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_retrieve(self, client: AsyncOpenAI) -> None:
+        async with client.files.with_streaming_response.retrieve(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(FileObject, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_list(self, client: AsyncOpenAI) -> None:
         file = await client.files.list()
@@ -190,10 +315,23 @@ class TestAsyncFiles:
     @parametrize
     async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
         response = await client.files.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(AsyncPage[FileObject], file, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_list(self, client: AsyncOpenAI) -> None:
+        async with client.files.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_delete(self, client: AsyncOpenAI) -> None:
         file = await client.files.delete(
@@ -206,10 +344,25 @@ class TestAsyncFiles:
         response = await client.files.with_raw_response.delete(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(FileDeleted, file, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_delete(self, client: AsyncOpenAI) -> None:
+        async with client.files.with_streaming_response.delete(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(FileDeleted, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_method_content(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
@@ -217,20 +370,37 @@ class TestAsyncFiles:
         file = await client.files.content(
             "string",
         )
-        assert isinstance(file, BinaryResponseContent)
+        assert isinstance(file, _legacy_response.HttpxBinaryResponseContent)
         assert file.json() == {"foo": "bar"}
 
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_raw_response_content(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+
         response = await client.files.with_raw_response.content(
             "string",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert isinstance(file, BinaryResponseContent)
-        assert file.json() == {"foo": "bar"}
+        assert_matches_type(_legacy_response.HttpxBinaryResponseContent, file, path=["response"])
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_streaming_response_content(self, client: AsyncOpenAI, respx_mock: MockRouter) -> None:
+        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        async with client.files.with_streaming_response.content(
+            "string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(bytes, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_method_retrieve_content(self, client: AsyncOpenAI) -> None:
@@ -238,6 +408,7 @@ class TestAsyncFiles:
             file = await client.files.retrieve_content(
                 "string",
             )
+
         assert_matches_type(str, file, path=["response"])
 
     @parametrize
@@ -246,6 +417,22 @@ class TestAsyncFiles:
             response = await client.files.with_raw_response.retrieve_content(
                 "string",
             )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
         assert_matches_type(str, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve_content(self, client: AsyncOpenAI) -> None:
+        with pytest.warns(DeprecationWarning):
+            async with client.files.with_streaming_response.retrieve_content(
+                "string",
+            ) as response:
+                assert not response.is_closed
+                assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+                file = await response.parse()
+                assert_matches_type(str, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -44,10 +45,25 @@ class TestImages:
         response = client.images.with_raw_response.create_variation(
             image=b"raw file contents",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create_variation(self, client: OpenAI) -> None:
+        with client.images.with_streaming_response.create_variation(
+            image=b"raw file contents",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_edit(self, client: OpenAI) -> None:
         image = client.images.edit(
@@ -76,10 +92,26 @@ class TestImages:
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
 
+    @parametrize
+    def test_streaming_response_edit(self, client: OpenAI) -> None:
+        with client.images.with_streaming_response.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_generate(self, client: OpenAI) -> None:
         image = client.images.generate(
@@ -106,10 +138,25 @@ class TestImages:
         response = client.images.with_raw_response.generate(
             prompt="A cute baby sea otter",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
 
+    @parametrize
+    def test_streaming_response_generate(self, client: OpenAI) -> None:
+        with client.images.with_streaming_response.generate(
+            prompt="A cute baby sea otter",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncImages:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -140,10 +187,25 @@ class TestAsyncImages:
         response = await client.images.with_raw_response.create_variation(
             image=b"raw file contents",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_create_variation(self, client: AsyncOpenAI) -> None:
+        async with client.images.with_streaming_response.create_variation(
+            image=b"raw file contents",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = await response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_edit(self, client: AsyncOpenAI) -> None:
         image = await client.images.edit(
@@ -172,10 +234,26 @@ class TestAsyncImages:
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_edit(self, client: AsyncOpenAI) -> None:
+        async with client.images.with_streaming_response.edit(
+            image=b"raw file contents",
+            prompt="A cute baby sea otter wearing a beret",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = await response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_generate(self, client: AsyncOpenAI) -> None:
         image = await client.images.generate(
@@ -202,6 +280,21 @@ class TestAsyncImages:
         response = await client.images.with_raw_response.generate(
             prompt="A cute baby sea otter",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         image = response.parse()
         assert_matches_type(ImagesResponse, image, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_generate(self, client: AsyncOpenAI) -> None:
+        async with client.images.with_streaming_response.generate(
+            prompt="A cute baby sea otter",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            image = await response.parse()
+            assert_matches_type(ImagesResponse, image, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -33,10 +34,25 @@ class TestModels:
         response = client.models.with_raw_response.retrieve(
             "gpt-3.5-turbo",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(Model, model, path=["response"])
 
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.models.with_streaming_response.retrieve(
+            "gpt-3.5-turbo",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(Model, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         model = client.models.list()
@@ -45,10 +61,23 @@ class TestModels:
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.models.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(SyncPage[Model], model, path=["response"])
 
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(SyncPage[Model], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         model = client.models.delete(
@@ -61,10 +90,25 @@ class TestModels:
         response = client.models.with_raw_response.delete(
             "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(ModelDeleted, model, path=["response"])
 
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.models.with_streaming_response.delete(
+            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(ModelDeleted, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncModels:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -83,10 +127,25 @@ class TestAsyncModels:
         response = await client.models.with_raw_response.retrieve(
             "gpt-3.5-turbo",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(Model, model, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_retrieve(self, client: AsyncOpenAI) -> None:
+        async with client.models.with_streaming_response.retrieve(
+            "gpt-3.5-turbo",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(Model, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_list(self, client: AsyncOpenAI) -> None:
         model = await client.models.list()
@@ -95,10 +154,23 @@ class TestAsyncModels:
     @parametrize
     async def test_raw_response_list(self, client: AsyncOpenAI) -> None:
         response = await client.models.with_raw_response.list()
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(AsyncPage[Model], model, path=["response"])
 
+    @parametrize
+    async def test_streaming_response_list(self, client: AsyncOpenAI) -> None:
+        async with client.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(AsyncPage[Model], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
     @parametrize
     async def test_method_delete(self, client: AsyncOpenAI) -> None:
         model = await client.models.delete(
@@ -111,6 +183,21 @@ class TestAsyncModels:
         response = await client.models.with_raw_response.delete(
             "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
         assert_matches_type(ModelDeleted, model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, client: AsyncOpenAI) -> None:
+        async with client.models.with_streaming_response.delete(
+            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(ModelDeleted, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+from typing import Any, cast
 
 import pytest
 
@@ -40,10 +41,25 @@ class TestModerations:
         response = client.moderations.with_raw_response.create(
             input="I want to kill them.",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         moderation = response.parse()
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.moderations.with_streaming_response.create(
+            input="I want to kill them.",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            moderation = response.parse()
+            assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncModerations:
     strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -70,6 +86,21 @@ class TestAsyncModerations:
         response = await client.moderations.with_raw_response.create(
             input="I want to kill them.",
         )
+
+        assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         moderation = response.parse()
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, client: AsyncOpenAI) -> None:
+        async with client.moderations.with_streaming_response.create(
+            input="I want to kill them.",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            moderation = await response.parse()
+            assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

@@ -19,6 +19,8 @@ from pydantic import ValidationError
 from openai import OpenAI, AsyncOpenAI, APIResponseValidationError
 from openai._client import OpenAI, AsyncOpenAI
 from openai._models import BaseModel, FinalRequestOptions
+from openai._response import APIResponse, AsyncAPIResponse
+from openai._constants import RAW_RESPONSE_HEADER
 from openai._streaming import Stream, AsyncStream
 from openai._exceptions import OpenAIError, APIStatusError, APITimeoutError, APIResponseValidationError
 from openai._base_client import DEFAULT_TIMEOUT, HTTPX_DEFAULT_TIMEOUT, BaseClient, make_request_options
@@ -220,6 +222,7 @@ class TestOpenAI:
                         # to_raw_response_wrapper leaks through the @functools.wraps() decorator.
                         #
                         # removing the decorator fixes the leak for reasons we don't understand.
+                        "openai/_legacy_response.py",
                         "openai/_response.py",
                         # pydantic.BaseModel.model_dump || pydantic.BaseModel.dict leak memory for some reason.
                         "openai/_compat.py",
@@ -612,8 +615,9 @@ class TestOpenAI:
 
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.post("/foo", cast_to=Model, stream=True)
-        assert isinstance(response, Stream)
+        stream = self.client.post("/foo", cast_to=Model, stream=True, stream_cls=Stream[Model])
+        assert isinstance(stream, Stream)
+        stream.response.close()
 
     @pytest.mark.respx(base_url=base_url)
     def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None:
@@ -661,6 +665,33 @@ class TestOpenAI:
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_streaming_response(self) -> None:
+        response = self.client.post(
+            "/chat/completions",
+            body=dict(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": "Say this is a test",
+                    }
+                ],
+                model="gpt-3.5-turbo",
+            ),
+            cast_to=APIResponse[bytes],
+            options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
+        )
+
+        assert not cast(Any, response.is_closed)
+        assert _get_open_connections(self.client) == 1
+
+        for _ in response.iter_bytes():
+            ...
+
+        assert cast(Any, response.is_closed)
+        assert _get_open_connections(self.client) == 0
+
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
@@ -679,7 +710,7 @@ class TestOpenAI:
                     model="gpt-3.5-turbo",
                 ),
                 cast_to=httpx.Response,
-                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
+                options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
 
         assert _get_open_connections(self.client) == 0
@@ -702,7 +733,7 @@ class TestOpenAI:
                     model="gpt-3.5-turbo",
                 ),
                 cast_to=httpx.Response,
-                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
+                options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
 
         assert _get_open_connections(self.client) == 0
@@ -883,6 +914,7 @@ class TestAsyncOpenAI:
                         # to_raw_response_wrapper leaks through the @functools.wraps() decorator.
                         #
                         # removing the decorator fixes the leak for reasons we don't understand.
+                        "openai/_legacy_response.py",
                         "openai/_response.py",
                         # pydantic.BaseModel.model_dump || pydantic.BaseModel.dict leak memory for some reason.
                         "openai/_compat.py",
@@ -1288,8 +1320,9 @@ class TestAsyncOpenAI:
 
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.post("/foo", cast_to=Model, stream=True)
-        assert isinstance(response, AsyncStream)
+        stream = await self.client.post("/foo", cast_to=Model, stream=True, stream_cls=AsyncStream[Model])
+        assert isinstance(stream, AsyncStream)
+        await stream.response.aclose()
 
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
@@ -1339,6 +1372,33 @@ class TestAsyncOpenAI:
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    async def test_streaming_response(self) -> None:
+        response = await self.client.post(
+            "/chat/completions",
+            body=dict(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": "Say this is a test",
+                    }
+                ],
+                model="gpt-3.5-turbo",
+            ),
+            cast_to=AsyncAPIResponse[bytes],
+            options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
+        )
+
+        assert not cast(Any, response.is_closed)
+        assert _get_open_connections(self.client) == 1
+
+        async for _ in response.iter_bytes():
+            ...
+
+        assert cast(Any, response.is_closed)
+        assert _get_open_connections(self.client) == 0
+
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
@@ -1357,7 +1417,7 @@ class TestAsyncOpenAI:
                     model="gpt-3.5-turbo",
                 ),
                 cast_to=httpx.Response,
-                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
+                options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
 
         assert _get_open_connections(self.client) == 0
@@ -1380,7 +1440,7 @@ class TestAsyncOpenAI:
                     model="gpt-3.5-turbo",
                 ),
                 cast_to=httpx.Response,
-                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
+                options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
             )
 
         assert _get_open_connections(self.client) == 0

@@ -0,0 +1,50 @@
+from typing import List
+
+import httpx
+import pytest
+
+from openai._response import (
+    APIResponse,
+    BaseAPIResponse,
+    AsyncAPIResponse,
+    BinaryAPIResponse,
+    AsyncBinaryAPIResponse,
+    extract_response_type,
+)
+
+
+class ConcreteBaseAPIResponse(APIResponse[bytes]):
+    ...
+
+
+class ConcreteAPIResponse(APIResponse[List[str]]):
+    ...
+
+
+class ConcreteAsyncAPIResponse(APIResponse[httpx.Response]):
+    ...
+
+
+def test_extract_response_type_direct_classes() -> None:
+    assert extract_response_type(BaseAPIResponse[str]) == str
+    assert extract_response_type(APIResponse[str]) == str
+    assert extract_response_type(AsyncAPIResponse[str]) == str
+
+
+def test_extract_response_type_direct_class_missing_type_arg() -> None:
+    with pytest.raises(
+        RuntimeError,
+        match="Expected type <class 'openai._response.AsyncAPIResponse'> to have a type argument at index 0 but it did not",
+    ):
+        extract_response_type(AsyncAPIResponse)
+
+
+def test_extract_response_type_concrete_subclasses() -> None:
+    assert extract_response_type(ConcreteBaseAPIResponse) == bytes
+    assert extract_response_type(ConcreteAPIResponse) == List[str]
+    assert extract_response_type(ConcreteAsyncAPIResponse) == httpx.Response
+
+
+def test_extract_response_type_binary_response() -> None:
+    assert extract_response_type(BinaryAPIResponse) == bytes
+    assert extract_response_type(AsyncBinaryAPIResponse) == bytes

@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import os
+import inspect
 import traceback
 import contextlib
 from typing import Any, TypeVar, Iterator, cast
@@ -68,6 +69,8 @@ def assert_matches_type(
         assert isinstance(value, bool)
     elif origin == float:
         assert isinstance(value, float)
+    elif origin == bytes:
+        assert isinstance(value, bytes)
     elif origin == datetime:
         assert isinstance(value, datetime)
     elif origin == date:
@@ -100,6 +103,8 @@ def assert_matches_type(
     elif issubclass(origin, BaseModel):
         assert isinstance(value, type_)
         assert assert_matches_model(type_, cast(Any, value), path=path)
+    elif inspect.isclass(origin) and origin.__name__ == "HttpxBinaryResponseContent":
+        assert value.__class__.__name__ == "HttpxBinaryResponseContent"
     else:
         assert None, f"Unhandled field type: {type_}"

@@ -414,7 +414,7 @@ if response.my_field is None:
 
 ### Accessing raw response data (e.g. headers)
 
-The "raw" Response object can be accessed by prefixing `.with_raw_response.` to any HTTP method call.
+The "raw" Response object can be accessed by prefixing `.with_raw_response.` to any HTTP method call, e.g.,
 
 ```py
 from openai import OpenAI
@@ -433,7 +433,40 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion)
 ```
 
-These methods return an [`APIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_response.py) object.
+These methods return an [`LegacyAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_legacy_response.py) object. This is a legacy class as we're changing it slightly in the next major version.
+
+For the sync client this will mostly be the same with the exception
+of `content` & `text` will be methods instead of properties. In the
+async client, all methods will be async.
+
+A migration script will be provided & the migration in general should
+be smooth.
+
+#### `.with_streaming_response`
+
+The above interface eagerly reads the full response body when you make the request, which may not always be what you want.
+
+To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
+
+As such, `.with_streaming_response` methods return a different [`APIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_response.py) object, and the async client returns an [`AsyncAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_response.py) object.
+
+```python
+with client.chat.completions.with_streaming_response.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "Say this is a test",
+        }
+    ],
+    model="gpt-3.5-turbo",
+) as response:
+    print(response.headers.get("X-My-Header"))
+
+    for line in response.iter_lines():
+        print(line)
+```
+
+The context manager is required so that the response will reliably be closed.
 
 ### Configuring the HTTP client

Commit 86379b44

Commit `86379b44`