openai-python/src/openai/resources/evals/evals.py at main

  1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2
  3from __future__ import annotations
  4
  5from typing import Iterable, Optional
  6from typing_extensions import Literal
  7
  8import httpx
  9
 10from ... import _legacy_response
 11from ...types import eval_list_params, eval_create_params, eval_update_params
 12from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
 13from ..._utils import maybe_transform, async_maybe_transform
 14from ..._compat import cached_property
 15from .runs.runs import (
 16    Runs,
 17    AsyncRuns,
 18    RunsWithRawResponse,
 19    AsyncRunsWithRawResponse,
 20    RunsWithStreamingResponse,
 21    AsyncRunsWithStreamingResponse,
 22)
 23from ..._resource import SyncAPIResource, AsyncAPIResource
 24from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 25from ...pagination import SyncCursorPage, AsyncCursorPage
 26from ..._base_client import AsyncPaginator, make_request_options
 27from ...types.eval_list_response import EvalListResponse
 28from ...types.eval_create_response import EvalCreateResponse
 29from ...types.eval_delete_response import EvalDeleteResponse
 30from ...types.eval_update_response import EvalUpdateResponse
 31from ...types.eval_retrieve_response import EvalRetrieveResponse
 32from ...types.shared_params.metadata import Metadata
 33
 34__all__ = ["Evals", "AsyncEvals"]
 35
 36
 37class Evals(SyncAPIResource):
 38    @cached_property
 39    def runs(self) -> Runs:
 40        return Runs(self._client)
 41
 42    @cached_property
 43    def with_raw_response(self) -> EvalsWithRawResponse:
 44        """
 45        This property can be used as a prefix for any HTTP method call to return
 46        the raw response object instead of the parsed content.
 47
 48        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
 49        """
 50        return EvalsWithRawResponse(self)
 51
 52    @cached_property
 53    def with_streaming_response(self) -> EvalsWithStreamingResponse:
 54        """
 55        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 56
 57        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
 58        """
 59        return EvalsWithStreamingResponse(self)
 60
 61    def create(
 62        self,
 63        *,
 64        data_source_config: eval_create_params.DataSourceConfig,
 65        testing_criteria: Iterable[eval_create_params.TestingCriterion],
 66        metadata: Optional[Metadata] | Omit = omit,
 67        name: str | Omit = omit,
 68        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
 69        # The extra values given here take precedence over values defined on the client or passed to this method.
 70        extra_headers: Headers | None = None,
 71        extra_query: Query | None = None,
 72        extra_body: Body | None = None,
 73        timeout: float | httpx.Timeout | None | NotGiven = not_given,
 74    ) -> EvalCreateResponse:
 75        """
 76        Create the structure of an evaluation that can be used to test a model's
 77        performance. An evaluation is a set of testing criteria and the config for a
 78        data source, which dictates the schema of the data used in the evaluation. After
 79        creating an evaluation, you can run it on different models and model parameters.
 80        We support several types of graders and datasources. For more information, see
 81        the [Evals guide](https://platform.openai.com/docs/guides/evals).
 82
 83        Args:
 84          data_source_config: The configuration for the data source used for the evaluation runs. Dictates the
 85              schema of the data used in the evaluation.
 86
 87          testing_criteria: A list of graders for all eval runs in this group. Graders can reference
 88              variables in the data source using double curly braces notation, like
 89              `{{item.variable_name}}`. To reference the model's output, use the `sample`
 90              namespace (ie, `{{sample.output_text}}`).
 91
 92          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
 93              for storing additional information about the object in a structured format, and
 94              querying for objects via API or the dashboard.
 95
 96              Keys are strings with a maximum length of 64 characters. Values are strings with
 97              a maximum length of 512 characters.
 98
 99          name: The name of the evaluation.
100
101          extra_headers: Send extra headers
102
103          extra_query: Add additional query parameters to the request
104
105          extra_body: Add additional JSON properties to the request
106
107          timeout: Override the client-level default timeout for this request, in seconds
108        """
109        return self._post(
110            "/evals",
111            body=maybe_transform(
112                {
113                    "data_source_config": data_source_config,
114                    "testing_criteria": testing_criteria,
115                    "metadata": metadata,
116                    "name": name,
117                },
118                eval_create_params.EvalCreateParams,
119            ),
120            options=make_request_options(
121                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
122            ),
123            cast_to=EvalCreateResponse,
124        )
125
126    def retrieve(
127        self,
128        eval_id: str,
129        *,
130        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
131        # The extra values given here take precedence over values defined on the client or passed to this method.
132        extra_headers: Headers | None = None,
133        extra_query: Query | None = None,
134        extra_body: Body | None = None,
135        timeout: float | httpx.Timeout | None | NotGiven = not_given,
136    ) -> EvalRetrieveResponse:
137        """
138        Get an evaluation by ID.
139
140        Args:
141          extra_headers: Send extra headers
142
143          extra_query: Add additional query parameters to the request
144
145          extra_body: Add additional JSON properties to the request
146
147          timeout: Override the client-level default timeout for this request, in seconds
148        """
149        if not eval_id:
150            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
151        return self._get(
152            f"/evals/{eval_id}",
153            options=make_request_options(
154                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
155            ),
156            cast_to=EvalRetrieveResponse,
157        )
158
159    def update(
160        self,
161        eval_id: str,
162        *,
163        metadata: Optional[Metadata] | Omit = omit,
164        name: str | Omit = omit,
165        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
166        # The extra values given here take precedence over values defined on the client or passed to this method.
167        extra_headers: Headers | None = None,
168        extra_query: Query | None = None,
169        extra_body: Body | None = None,
170        timeout: float | httpx.Timeout | None | NotGiven = not_given,
171    ) -> EvalUpdateResponse:
172        """
173        Update certain properties of an evaluation.
174
175        Args:
176          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
177              for storing additional information about the object in a structured format, and
178              querying for objects via API or the dashboard.
179
180              Keys are strings with a maximum length of 64 characters. Values are strings with
181              a maximum length of 512 characters.
182
183          name: Rename the evaluation.
184
185          extra_headers: Send extra headers
186
187          extra_query: Add additional query parameters to the request
188
189          extra_body: Add additional JSON properties to the request
190
191          timeout: Override the client-level default timeout for this request, in seconds
192        """
193        if not eval_id:
194            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
195        return self._post(
196            f"/evals/{eval_id}",
197            body=maybe_transform(
198                {
199                    "metadata": metadata,
200                    "name": name,
201                },
202                eval_update_params.EvalUpdateParams,
203            ),
204            options=make_request_options(
205                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
206            ),
207            cast_to=EvalUpdateResponse,
208        )
209
210    def list(
211        self,
212        *,
213        after: str | Omit = omit,
214        limit: int | Omit = omit,
215        order: Literal["asc", "desc"] | Omit = omit,
216        order_by: Literal["created_at", "updated_at"] | Omit = omit,
217        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
218        # The extra values given here take precedence over values defined on the client or passed to this method.
219        extra_headers: Headers | None = None,
220        extra_query: Query | None = None,
221        extra_body: Body | None = None,
222        timeout: float | httpx.Timeout | None | NotGiven = not_given,
223    ) -> SyncCursorPage[EvalListResponse]:
224        """
225        List evaluations for a project.
226
227        Args:
228          after: Identifier for the last eval from the previous pagination request.
229
230          limit: Number of evals to retrieve.
231
232          order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
233              descending order.
234
235          order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
236              creation time or `updated_at` for last updated time.
237
238          extra_headers: Send extra headers
239
240          extra_query: Add additional query parameters to the request
241
242          extra_body: Add additional JSON properties to the request
243
244          timeout: Override the client-level default timeout for this request, in seconds
245        """
246        return self._get_api_list(
247            "/evals",
248            page=SyncCursorPage[EvalListResponse],
249            options=make_request_options(
250                extra_headers=extra_headers,
251                extra_query=extra_query,
252                extra_body=extra_body,
253                timeout=timeout,
254                query=maybe_transform(
255                    {
256                        "after": after,
257                        "limit": limit,
258                        "order": order,
259                        "order_by": order_by,
260                    },
261                    eval_list_params.EvalListParams,
262                ),
263            ),
264            model=EvalListResponse,
265        )
266
267    def delete(
268        self,
269        eval_id: str,
270        *,
271        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
272        # The extra values given here take precedence over values defined on the client or passed to this method.
273        extra_headers: Headers | None = None,
274        extra_query: Query | None = None,
275        extra_body: Body | None = None,
276        timeout: float | httpx.Timeout | None | NotGiven = not_given,
277    ) -> EvalDeleteResponse:
278        """
279        Delete an evaluation.
280
281        Args:
282          extra_headers: Send extra headers
283
284          extra_query: Add additional query parameters to the request
285
286          extra_body: Add additional JSON properties to the request
287
288          timeout: Override the client-level default timeout for this request, in seconds
289        """
290        if not eval_id:
291            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
292        return self._delete(
293            f"/evals/{eval_id}",
294            options=make_request_options(
295                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
296            ),
297            cast_to=EvalDeleteResponse,
298        )
299
300
301class AsyncEvals(AsyncAPIResource):
302    @cached_property
303    def runs(self) -> AsyncRuns:
304        return AsyncRuns(self._client)
305
306    @cached_property
307    def with_raw_response(self) -> AsyncEvalsWithRawResponse:
308        """
309        This property can be used as a prefix for any HTTP method call to return
310        the raw response object instead of the parsed content.
311
312        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
313        """
314        return AsyncEvalsWithRawResponse(self)
315
316    @cached_property
317    def with_streaming_response(self) -> AsyncEvalsWithStreamingResponse:
318        """
319        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
320
321        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
322        """
323        return AsyncEvalsWithStreamingResponse(self)
324
325    async def create(
326        self,
327        *,
328        data_source_config: eval_create_params.DataSourceConfig,
329        testing_criteria: Iterable[eval_create_params.TestingCriterion],
330        metadata: Optional[Metadata] | Omit = omit,
331        name: str | Omit = omit,
332        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
333        # The extra values given here take precedence over values defined on the client or passed to this method.
334        extra_headers: Headers | None = None,
335        extra_query: Query | None = None,
336        extra_body: Body | None = None,
337        timeout: float | httpx.Timeout | None | NotGiven = not_given,
338    ) -> EvalCreateResponse:
339        """
340        Create the structure of an evaluation that can be used to test a model's
341        performance. An evaluation is a set of testing criteria and the config for a
342        data source, which dictates the schema of the data used in the evaluation. After
343        creating an evaluation, you can run it on different models and model parameters.
344        We support several types of graders and datasources. For more information, see
345        the [Evals guide](https://platform.openai.com/docs/guides/evals).
346
347        Args:
348          data_source_config: The configuration for the data source used for the evaluation runs. Dictates the
349              schema of the data used in the evaluation.
350
351          testing_criteria: A list of graders for all eval runs in this group. Graders can reference
352              variables in the data source using double curly braces notation, like
353              `{{item.variable_name}}`. To reference the model's output, use the `sample`
354              namespace (ie, `{{sample.output_text}}`).
355
356          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
357              for storing additional information about the object in a structured format, and
358              querying for objects via API or the dashboard.
359
360              Keys are strings with a maximum length of 64 characters. Values are strings with
361              a maximum length of 512 characters.
362
363          name: The name of the evaluation.
364
365          extra_headers: Send extra headers
366
367          extra_query: Add additional query parameters to the request
368
369          extra_body: Add additional JSON properties to the request
370
371          timeout: Override the client-level default timeout for this request, in seconds
372        """
373        return await self._post(
374            "/evals",
375            body=await async_maybe_transform(
376                {
377                    "data_source_config": data_source_config,
378                    "testing_criteria": testing_criteria,
379                    "metadata": metadata,
380                    "name": name,
381                },
382                eval_create_params.EvalCreateParams,
383            ),
384            options=make_request_options(
385                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
386            ),
387            cast_to=EvalCreateResponse,
388        )
389
390    async def retrieve(
391        self,
392        eval_id: str,
393        *,
394        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
395        # The extra values given here take precedence over values defined on the client or passed to this method.
396        extra_headers: Headers | None = None,
397        extra_query: Query | None = None,
398        extra_body: Body | None = None,
399        timeout: float | httpx.Timeout | None | NotGiven = not_given,
400    ) -> EvalRetrieveResponse:
401        """
402        Get an evaluation by ID.
403
404        Args:
405          extra_headers: Send extra headers
406
407          extra_query: Add additional query parameters to the request
408
409          extra_body: Add additional JSON properties to the request
410
411          timeout: Override the client-level default timeout for this request, in seconds
412        """
413        if not eval_id:
414            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
415        return await self._get(
416            f"/evals/{eval_id}",
417            options=make_request_options(
418                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
419            ),
420            cast_to=EvalRetrieveResponse,
421        )
422
423    async def update(
424        self,
425        eval_id: str,
426        *,
427        metadata: Optional[Metadata] | Omit = omit,
428        name: str | Omit = omit,
429        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
430        # The extra values given here take precedence over values defined on the client or passed to this method.
431        extra_headers: Headers | None = None,
432        extra_query: Query | None = None,
433        extra_body: Body | None = None,
434        timeout: float | httpx.Timeout | None | NotGiven = not_given,
435    ) -> EvalUpdateResponse:
436        """
437        Update certain properties of an evaluation.
438
439        Args:
440          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
441              for storing additional information about the object in a structured format, and
442              querying for objects via API or the dashboard.
443
444              Keys are strings with a maximum length of 64 characters. Values are strings with
445              a maximum length of 512 characters.
446
447          name: Rename the evaluation.
448
449          extra_headers: Send extra headers
450
451          extra_query: Add additional query parameters to the request
452
453          extra_body: Add additional JSON properties to the request
454
455          timeout: Override the client-level default timeout for this request, in seconds
456        """
457        if not eval_id:
458            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
459        return await self._post(
460            f"/evals/{eval_id}",
461            body=await async_maybe_transform(
462                {
463                    "metadata": metadata,
464                    "name": name,
465                },
466                eval_update_params.EvalUpdateParams,
467            ),
468            options=make_request_options(
469                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
470            ),
471            cast_to=EvalUpdateResponse,
472        )
473
474    def list(
475        self,
476        *,
477        after: str | Omit = omit,
478        limit: int | Omit = omit,
479        order: Literal["asc", "desc"] | Omit = omit,
480        order_by: Literal["created_at", "updated_at"] | Omit = omit,
481        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
482        # The extra values given here take precedence over values defined on the client or passed to this method.
483        extra_headers: Headers | None = None,
484        extra_query: Query | None = None,
485        extra_body: Body | None = None,
486        timeout: float | httpx.Timeout | None | NotGiven = not_given,
487    ) -> AsyncPaginator[EvalListResponse, AsyncCursorPage[EvalListResponse]]:
488        """
489        List evaluations for a project.
490
491        Args:
492          after: Identifier for the last eval from the previous pagination request.
493
494          limit: Number of evals to retrieve.
495
496          order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
497              descending order.
498
499          order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
500              creation time or `updated_at` for last updated time.
501
502          extra_headers: Send extra headers
503
504          extra_query: Add additional query parameters to the request
505
506          extra_body: Add additional JSON properties to the request
507
508          timeout: Override the client-level default timeout for this request, in seconds
509        """
510        return self._get_api_list(
511            "/evals",
512            page=AsyncCursorPage[EvalListResponse],
513            options=make_request_options(
514                extra_headers=extra_headers,
515                extra_query=extra_query,
516                extra_body=extra_body,
517                timeout=timeout,
518                query=maybe_transform(
519                    {
520                        "after": after,
521                        "limit": limit,
522                        "order": order,
523                        "order_by": order_by,
524                    },
525                    eval_list_params.EvalListParams,
526                ),
527            ),
528            model=EvalListResponse,
529        )
530
531    async def delete(
532        self,
533        eval_id: str,
534        *,
535        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
536        # The extra values given here take precedence over values defined on the client or passed to this method.
537        extra_headers: Headers | None = None,
538        extra_query: Query | None = None,
539        extra_body: Body | None = None,
540        timeout: float | httpx.Timeout | None | NotGiven = not_given,
541    ) -> EvalDeleteResponse:
542        """
543        Delete an evaluation.
544
545        Args:
546          extra_headers: Send extra headers
547
548          extra_query: Add additional query parameters to the request
549
550          extra_body: Add additional JSON properties to the request
551
552          timeout: Override the client-level default timeout for this request, in seconds
553        """
554        if not eval_id:
555            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
556        return await self._delete(
557            f"/evals/{eval_id}",
558            options=make_request_options(
559                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
560            ),
561            cast_to=EvalDeleteResponse,
562        )
563
564
565class EvalsWithRawResponse:
566    def __init__(self, evals: Evals) -> None:
567        self._evals = evals
568
569        self.create = _legacy_response.to_raw_response_wrapper(
570            evals.create,
571        )
572        self.retrieve = _legacy_response.to_raw_response_wrapper(
573            evals.retrieve,
574        )
575        self.update = _legacy_response.to_raw_response_wrapper(
576            evals.update,
577        )
578        self.list = _legacy_response.to_raw_response_wrapper(
579            evals.list,
580        )
581        self.delete = _legacy_response.to_raw_response_wrapper(
582            evals.delete,
583        )
584
585    @cached_property
586    def runs(self) -> RunsWithRawResponse:
587        return RunsWithRawResponse(self._evals.runs)
588
589
590class AsyncEvalsWithRawResponse:
591    def __init__(self, evals: AsyncEvals) -> None:
592        self._evals = evals
593
594        self.create = _legacy_response.async_to_raw_response_wrapper(
595            evals.create,
596        )
597        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
598            evals.retrieve,
599        )
600        self.update = _legacy_response.async_to_raw_response_wrapper(
601            evals.update,
602        )
603        self.list = _legacy_response.async_to_raw_response_wrapper(
604            evals.list,
605        )
606        self.delete = _legacy_response.async_to_raw_response_wrapper(
607            evals.delete,
608        )
609
610    @cached_property
611    def runs(self) -> AsyncRunsWithRawResponse:
612        return AsyncRunsWithRawResponse(self._evals.runs)
613
614
615class EvalsWithStreamingResponse:
616    def __init__(self, evals: Evals) -> None:
617        self._evals = evals
618
619        self.create = to_streamed_response_wrapper(
620            evals.create,
621        )
622        self.retrieve = to_streamed_response_wrapper(
623            evals.retrieve,
624        )
625        self.update = to_streamed_response_wrapper(
626            evals.update,
627        )
628        self.list = to_streamed_response_wrapper(
629            evals.list,
630        )
631        self.delete = to_streamed_response_wrapper(
632            evals.delete,
633        )
634
635    @cached_property
636    def runs(self) -> RunsWithStreamingResponse:
637        return RunsWithStreamingResponse(self._evals.runs)
638
639
640class AsyncEvalsWithStreamingResponse:
641    def __init__(self, evals: AsyncEvals) -> None:
642        self._evals = evals
643
644        self.create = async_to_streamed_response_wrapper(
645            evals.create,
646        )
647        self.retrieve = async_to_streamed_response_wrapper(
648            evals.retrieve,
649        )
650        self.update = async_to_streamed_response_wrapper(
651            evals.update,
652        )
653        self.list = async_to_streamed_response_wrapper(
654            evals.list,
655        )
656        self.delete = async_to_streamed_response_wrapper(
657            evals.delete,
658        )
659
660    @cached_property
661    def runs(self) -> AsyncRunsWithStreamingResponse:
662        return AsyncRunsWithStreamingResponse(self._evals.runs)