openai-python/src/openai/resources/uploads/uploads.py at main

  1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2
  3from __future__ import annotations
  4
  5import io
  6import os
  7import logging
  8import builtins
  9from typing import overload
 10from pathlib import Path
 11
 12import anyio
 13import httpx
 14
 15from ... import _legacy_response
 16from .parts import (
 17    Parts,
 18    AsyncParts,
 19    PartsWithRawResponse,
 20    AsyncPartsWithRawResponse,
 21    PartsWithStreamingResponse,
 22    AsyncPartsWithStreamingResponse,
 23)
 24from ...types import FilePurpose, upload_create_params, upload_complete_params
 25from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
 26from ..._utils import maybe_transform, async_maybe_transform
 27from ..._compat import cached_property
 28from ..._resource import SyncAPIResource, AsyncAPIResource
 29from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 30from ..._base_client import make_request_options
 31from ...types.upload import Upload
 32from ...types.file_purpose import FilePurpose
 33
 34__all__ = ["Uploads", "AsyncUploads"]
 35
 36
 37# 64MB
 38DEFAULT_PART_SIZE = 64 * 1024 * 1024
 39
 40log: logging.Logger = logging.getLogger(__name__)
 41
 42
 43class Uploads(SyncAPIResource):
 44    @cached_property
 45    def parts(self) -> Parts:
 46        return Parts(self._client)
 47
 48    @cached_property
 49    def with_raw_response(self) -> UploadsWithRawResponse:
 50        """
 51        This property can be used as a prefix for any HTTP method call to return
 52        the raw response object instead of the parsed content.
 53
 54        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
 55        """
 56        return UploadsWithRawResponse(self)
 57
 58    @cached_property
 59    def with_streaming_response(self) -> UploadsWithStreamingResponse:
 60        """
 61        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 62
 63        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
 64        """
 65        return UploadsWithStreamingResponse(self)
 66
 67    @overload
 68    def upload_file_chunked(
 69        self,
 70        *,
 71        file: os.PathLike[str],
 72        mime_type: str,
 73        purpose: FilePurpose,
 74        bytes: int | None = None,
 75        part_size: int | None = None,
 76        md5: str | Omit = omit,
 77    ) -> Upload:
 78        """Splits a file into multiple 64MB parts and uploads them sequentially."""
 79
 80    @overload
 81    def upload_file_chunked(
 82        self,
 83        *,
 84        file: bytes,
 85        filename: str,
 86        bytes: int,
 87        mime_type: str,
 88        purpose: FilePurpose,
 89        part_size: int | None = None,
 90        md5: str | Omit = omit,
 91    ) -> Upload:
 92        """Splits an in-memory file into multiple 64MB parts and uploads them sequentially."""
 93
 94    def upload_file_chunked(
 95        self,
 96        *,
 97        file: os.PathLike[str] | bytes,
 98        mime_type: str,
 99        purpose: FilePurpose,
100        filename: str | None = None,
101        bytes: int | None = None,
102        part_size: int | None = None,
103        md5: str | Omit = omit,
104    ) -> Upload:
105        """Splits the given file into multiple parts and uploads them sequentially.
106
107        ```py
108        from pathlib import Path
109
110        client.uploads.upload_file(
111            file=Path("my-paper.pdf"),
112            mime_type="pdf",
113            purpose="assistants",
114        )
115        ```
116        """
117        if isinstance(file, builtins.bytes):
118            if filename is None:
119                raise TypeError("The `filename` argument must be given for in-memory files")
120
121            if bytes is None:
122                raise TypeError("The `bytes` argument must be given for in-memory files")
123        else:
124            if not isinstance(file, Path):
125                file = Path(file)
126
127            if not filename:
128                filename = file.name
129
130            if bytes is None:
131                bytes = file.stat().st_size
132
133        upload = self.create(
134            bytes=bytes,
135            filename=filename,
136            mime_type=mime_type,
137            purpose=purpose,
138        )
139
140        part_ids: list[str] = []
141
142        if part_size is None:
143            part_size = DEFAULT_PART_SIZE
144
145        if isinstance(file, builtins.bytes):
146            buf: io.FileIO | io.BytesIO = io.BytesIO(file)
147        else:
148            buf = io.FileIO(file)
149
150        try:
151            while True:
152                data = buf.read(part_size)
153                if not data:
154                    # EOF
155                    break
156
157                part = self.parts.create(upload_id=upload.id, data=data)
158                log.info("Uploaded part %s for upload %s", part.id, upload.id)
159                part_ids.append(part.id)
160        finally:
161            buf.close()
162
163        return self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5)
164
165    def create(
166        self,
167        *,
168        bytes: int,
169        filename: str,
170        mime_type: str,
171        purpose: FilePurpose,
172        expires_after: upload_create_params.ExpiresAfter | Omit = omit,
173        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
174        # The extra values given here take precedence over values defined on the client or passed to this method.
175        extra_headers: Headers | None = None,
176        extra_query: Query | None = None,
177        extra_body: Body | None = None,
178        timeout: float | httpx.Timeout | None | NotGiven = not_given,
179    ) -> Upload:
180        """
181        Creates an intermediate
182        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object
183        that you can add
184        [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to.
185        Currently, an Upload can accept at most 8 GB in total and expires after an hour
186        after you create it.
187
188        Once you complete the Upload, we will create a
189        [File](https://platform.openai.com/docs/api-reference/files/object) object that
190        contains all the parts you uploaded. This File is usable in the rest of our
191        platform as a regular File object.
192
193        For certain `purpose` values, the correct `mime_type` must be specified. Please
194        refer to documentation for the
195        [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files).
196
197        For guidance on the proper filename extensions for each purpose, please follow
198        the documentation on
199        [creating a File](https://platform.openai.com/docs/api-reference/files/create).
200
201        Args:
202          bytes: The number of bytes in the file you are uploading.
203
204          filename: The name of the file to upload.
205
206          mime_type: The MIME type of the file.
207
208              This must fall within the supported MIME types for your file purpose. See the
209              supported MIME types for assistants and vision.
210
211          purpose: The intended purpose of the uploaded file.
212
213              See the
214              [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
215
216          expires_after: The expiration policy for a file. By default, files with `purpose=batch` expire
217              after 30 days and all other files are persisted until they are manually deleted.
218
219          extra_headers: Send extra headers
220
221          extra_query: Add additional query parameters to the request
222
223          extra_body: Add additional JSON properties to the request
224
225          timeout: Override the client-level default timeout for this request, in seconds
226        """
227        return self._post(
228            "/uploads",
229            body=maybe_transform(
230                {
231                    "bytes": bytes,
232                    "filename": filename,
233                    "mime_type": mime_type,
234                    "purpose": purpose,
235                    "expires_after": expires_after,
236                },
237                upload_create_params.UploadCreateParams,
238            ),
239            options=make_request_options(
240                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
241            ),
242            cast_to=Upload,
243        )
244
245    def cancel(
246        self,
247        upload_id: str,
248        *,
249        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
250        # The extra values given here take precedence over values defined on the client or passed to this method.
251        extra_headers: Headers | None = None,
252        extra_query: Query | None = None,
253        extra_body: Body | None = None,
254        timeout: float | httpx.Timeout | None | NotGiven = not_given,
255    ) -> Upload:
256        """Cancels the Upload.
257
258        No Parts may be added after an Upload is cancelled.
259
260        Args:
261          extra_headers: Send extra headers
262
263          extra_query: Add additional query parameters to the request
264
265          extra_body: Add additional JSON properties to the request
266
267          timeout: Override the client-level default timeout for this request, in seconds
268        """
269        if not upload_id:
270            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
271        return self._post(
272            f"/uploads/{upload_id}/cancel",
273            options=make_request_options(
274                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
275            ),
276            cast_to=Upload,
277        )
278
279    def complete(
280        self,
281        upload_id: str,
282        *,
283        part_ids: SequenceNotStr[str],
284        md5: str | Omit = omit,
285        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
286        # The extra values given here take precedence over values defined on the client or passed to this method.
287        extra_headers: Headers | None = None,
288        extra_query: Query | None = None,
289        extra_body: Body | None = None,
290        timeout: float | httpx.Timeout | None | NotGiven = not_given,
291    ) -> Upload:
292        """
293        Completes the
294        [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
295
296        Within the returned Upload object, there is a nested
297        [File](https://platform.openai.com/docs/api-reference/files/object) object that
298        is ready to use in the rest of the platform.
299
300        You can specify the order of the Parts by passing in an ordered list of the Part
301        IDs.
302
303        The number of bytes uploaded upon completion must match the number of bytes
304        initially specified when creating the Upload object. No Parts may be added after
305        an Upload is completed.
306
307        Args:
308          part_ids: The ordered list of Part IDs.
309
310          md5: The optional md5 checksum for the file contents to verify if the bytes uploaded
311              matches what you expect.
312
313          extra_headers: Send extra headers
314
315          extra_query: Add additional query parameters to the request
316
317          extra_body: Add additional JSON properties to the request
318
319          timeout: Override the client-level default timeout for this request, in seconds
320        """
321        if not upload_id:
322            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
323        return self._post(
324            f"/uploads/{upload_id}/complete",
325            body=maybe_transform(
326                {
327                    "part_ids": part_ids,
328                    "md5": md5,
329                },
330                upload_complete_params.UploadCompleteParams,
331            ),
332            options=make_request_options(
333                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
334            ),
335            cast_to=Upload,
336        )
337
338
339class AsyncUploads(AsyncAPIResource):
340    @cached_property
341    def parts(self) -> AsyncParts:
342        return AsyncParts(self._client)
343
344    @cached_property
345    def with_raw_response(self) -> AsyncUploadsWithRawResponse:
346        """
347        This property can be used as a prefix for any HTTP method call to return
348        the raw response object instead of the parsed content.
349
350        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
351        """
352        return AsyncUploadsWithRawResponse(self)
353
354    @cached_property
355    def with_streaming_response(self) -> AsyncUploadsWithStreamingResponse:
356        """
357        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
358
359        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
360        """
361        return AsyncUploadsWithStreamingResponse(self)
362
363    @overload
364    async def upload_file_chunked(
365        self,
366        *,
367        file: os.PathLike[str],
368        mime_type: str,
369        purpose: FilePurpose,
370        bytes: int | None = None,
371        part_size: int | None = None,
372        md5: str | Omit = omit,
373    ) -> Upload:
374        """Splits a file into multiple 64MB parts and uploads them sequentially."""
375
376    @overload
377    async def upload_file_chunked(
378        self,
379        *,
380        file: bytes,
381        filename: str,
382        bytes: int,
383        mime_type: str,
384        purpose: FilePurpose,
385        part_size: int | None = None,
386        md5: str | Omit = omit,
387    ) -> Upload:
388        """Splits an in-memory file into multiple 64MB parts and uploads them sequentially."""
389
390    async def upload_file_chunked(
391        self,
392        *,
393        file: os.PathLike[str] | bytes,
394        mime_type: str,
395        purpose: FilePurpose,
396        filename: str | None = None,
397        bytes: int | None = None,
398        part_size: int | None = None,
399        md5: str | Omit = omit,
400    ) -> Upload:
401        """Splits the given file into multiple parts and uploads them sequentially.
402
403        ```py
404        from pathlib import Path
405
406        client.uploads.upload_file(
407            file=Path("my-paper.pdf"),
408            mime_type="pdf",
409            purpose="assistants",
410        )
411        ```
412        """
413        if isinstance(file, builtins.bytes):
414            if filename is None:
415                raise TypeError("The `filename` argument must be given for in-memory files")
416
417            if bytes is None:
418                raise TypeError("The `bytes` argument must be given for in-memory files")
419        else:
420            if not isinstance(file, anyio.Path):
421                file = anyio.Path(file)
422
423            if not filename:
424                filename = file.name
425
426            if bytes is None:
427                stat = await file.stat()
428                bytes = stat.st_size
429
430        upload = await self.create(
431            bytes=bytes,
432            filename=filename,
433            mime_type=mime_type,
434            purpose=purpose,
435        )
436
437        part_ids: list[str] = []
438
439        if part_size is None:
440            part_size = DEFAULT_PART_SIZE
441
442        if isinstance(file, anyio.Path):
443            fd = await file.open("rb")
444            async with fd:
445                while True:
446                    data = await fd.read(part_size)
447                    if not data:
448                        # EOF
449                        break
450
451                    part = await self.parts.create(upload_id=upload.id, data=data)
452                    log.info("Uploaded part %s for upload %s", part.id, upload.id)
453                    part_ids.append(part.id)
454        else:
455            buf = io.BytesIO(file)
456
457            try:
458                while True:
459                    data = buf.read(part_size)
460                    if not data:
461                        # EOF
462                        break
463
464                    part = await self.parts.create(upload_id=upload.id, data=data)
465                    log.info("Uploaded part %s for upload %s", part.id, upload.id)
466                    part_ids.append(part.id)
467            finally:
468                buf.close()
469
470        return await self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5)
471
472    async def create(
473        self,
474        *,
475        bytes: int,
476        filename: str,
477        mime_type: str,
478        purpose: FilePurpose,
479        expires_after: upload_create_params.ExpiresAfter | Omit = omit,
480        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
481        # The extra values given here take precedence over values defined on the client or passed to this method.
482        extra_headers: Headers | None = None,
483        extra_query: Query | None = None,
484        extra_body: Body | None = None,
485        timeout: float | httpx.Timeout | None | NotGiven = not_given,
486    ) -> Upload:
487        """
488        Creates an intermediate
489        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object
490        that you can add
491        [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to.
492        Currently, an Upload can accept at most 8 GB in total and expires after an hour
493        after you create it.
494
495        Once you complete the Upload, we will create a
496        [File](https://platform.openai.com/docs/api-reference/files/object) object that
497        contains all the parts you uploaded. This File is usable in the rest of our
498        platform as a regular File object.
499
500        For certain `purpose` values, the correct `mime_type` must be specified. Please
501        refer to documentation for the
502        [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files).
503
504        For guidance on the proper filename extensions for each purpose, please follow
505        the documentation on
506        [creating a File](https://platform.openai.com/docs/api-reference/files/create).
507
508        Args:
509          bytes: The number of bytes in the file you are uploading.
510
511          filename: The name of the file to upload.
512
513          mime_type: The MIME type of the file.
514
515              This must fall within the supported MIME types for your file purpose. See the
516              supported MIME types for assistants and vision.
517
518          purpose: The intended purpose of the uploaded file.
519
520              See the
521              [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
522
523          expires_after: The expiration policy for a file. By default, files with `purpose=batch` expire
524              after 30 days and all other files are persisted until they are manually deleted.
525
526          extra_headers: Send extra headers
527
528          extra_query: Add additional query parameters to the request
529
530          extra_body: Add additional JSON properties to the request
531
532          timeout: Override the client-level default timeout for this request, in seconds
533        """
534        return await self._post(
535            "/uploads",
536            body=await async_maybe_transform(
537                {
538                    "bytes": bytes,
539                    "filename": filename,
540                    "mime_type": mime_type,
541                    "purpose": purpose,
542                    "expires_after": expires_after,
543                },
544                upload_create_params.UploadCreateParams,
545            ),
546            options=make_request_options(
547                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
548            ),
549            cast_to=Upload,
550        )
551
552    async def cancel(
553        self,
554        upload_id: str,
555        *,
556        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
557        # The extra values given here take precedence over values defined on the client or passed to this method.
558        extra_headers: Headers | None = None,
559        extra_query: Query | None = None,
560        extra_body: Body | None = None,
561        timeout: float | httpx.Timeout | None | NotGiven = not_given,
562    ) -> Upload:
563        """Cancels the Upload.
564
565        No Parts may be added after an Upload is cancelled.
566
567        Args:
568          extra_headers: Send extra headers
569
570          extra_query: Add additional query parameters to the request
571
572          extra_body: Add additional JSON properties to the request
573
574          timeout: Override the client-level default timeout for this request, in seconds
575        """
576        if not upload_id:
577            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
578        return await self._post(
579            f"/uploads/{upload_id}/cancel",
580            options=make_request_options(
581                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
582            ),
583            cast_to=Upload,
584        )
585
586    async def complete(
587        self,
588        upload_id: str,
589        *,
590        part_ids: SequenceNotStr[str],
591        md5: str | Omit = omit,
592        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
593        # The extra values given here take precedence over values defined on the client or passed to this method.
594        extra_headers: Headers | None = None,
595        extra_query: Query | None = None,
596        extra_body: Body | None = None,
597        timeout: float | httpx.Timeout | None | NotGiven = not_given,
598    ) -> Upload:
599        """
600        Completes the
601        [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
602
603        Within the returned Upload object, there is a nested
604        [File](https://platform.openai.com/docs/api-reference/files/object) object that
605        is ready to use in the rest of the platform.
606
607        You can specify the order of the Parts by passing in an ordered list of the Part
608        IDs.
609
610        The number of bytes uploaded upon completion must match the number of bytes
611        initially specified when creating the Upload object. No Parts may be added after
612        an Upload is completed.
613
614        Args:
615          part_ids: The ordered list of Part IDs.
616
617          md5: The optional md5 checksum for the file contents to verify if the bytes uploaded
618              matches what you expect.
619
620          extra_headers: Send extra headers
621
622          extra_query: Add additional query parameters to the request
623
624          extra_body: Add additional JSON properties to the request
625
626          timeout: Override the client-level default timeout for this request, in seconds
627        """
628        if not upload_id:
629            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
630        return await self._post(
631            f"/uploads/{upload_id}/complete",
632            body=await async_maybe_transform(
633                {
634                    "part_ids": part_ids,
635                    "md5": md5,
636                },
637                upload_complete_params.UploadCompleteParams,
638            ),
639            options=make_request_options(
640                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
641            ),
642            cast_to=Upload,
643        )
644
645
646class UploadsWithRawResponse:
647    def __init__(self, uploads: Uploads) -> None:
648        self._uploads = uploads
649
650        self.create = _legacy_response.to_raw_response_wrapper(
651            uploads.create,
652        )
653        self.cancel = _legacy_response.to_raw_response_wrapper(
654            uploads.cancel,
655        )
656        self.complete = _legacy_response.to_raw_response_wrapper(
657            uploads.complete,
658        )
659
660    @cached_property
661    def parts(self) -> PartsWithRawResponse:
662        return PartsWithRawResponse(self._uploads.parts)
663
664
665class AsyncUploadsWithRawResponse:
666    def __init__(self, uploads: AsyncUploads) -> None:
667        self._uploads = uploads
668
669        self.create = _legacy_response.async_to_raw_response_wrapper(
670            uploads.create,
671        )
672        self.cancel = _legacy_response.async_to_raw_response_wrapper(
673            uploads.cancel,
674        )
675        self.complete = _legacy_response.async_to_raw_response_wrapper(
676            uploads.complete,
677        )
678
679    @cached_property
680    def parts(self) -> AsyncPartsWithRawResponse:
681        return AsyncPartsWithRawResponse(self._uploads.parts)
682
683
684class UploadsWithStreamingResponse:
685    def __init__(self, uploads: Uploads) -> None:
686        self._uploads = uploads
687
688        self.create = to_streamed_response_wrapper(
689            uploads.create,
690        )
691        self.cancel = to_streamed_response_wrapper(
692            uploads.cancel,
693        )
694        self.complete = to_streamed_response_wrapper(
695            uploads.complete,
696        )
697
698    @cached_property
699    def parts(self) -> PartsWithStreamingResponse:
700        return PartsWithStreamingResponse(self._uploads.parts)
701
702
703class AsyncUploadsWithStreamingResponse:
704    def __init__(self, uploads: AsyncUploads) -> None:
705        self._uploads = uploads
706
707        self.create = async_to_streamed_response_wrapper(
708            uploads.create,
709        )
710        self.cancel = async_to_streamed_response_wrapper(
711            uploads.cancel,
712        )
713        self.complete = async_to_streamed_response_wrapper(
714            uploads.complete,
715        )
716
717    @cached_property
718    def parts(self) -> AsyncPartsWithStreamingResponse:
719        return AsyncPartsWithStreamingResponse(self._uploads.parts)