main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5from typing import Iterable, Optional
6from typing_extensions import Literal
7
8import httpx
9
10from ... import _legacy_response
11from ...types import eval_list_params, eval_create_params, eval_update_params
12from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
13from ..._utils import maybe_transform, async_maybe_transform
14from ..._compat import cached_property
15from .runs.runs import (
16 Runs,
17 AsyncRuns,
18 RunsWithRawResponse,
19 AsyncRunsWithRawResponse,
20 RunsWithStreamingResponse,
21 AsyncRunsWithStreamingResponse,
22)
23from ..._resource import SyncAPIResource, AsyncAPIResource
24from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
25from ...pagination import SyncCursorPage, AsyncCursorPage
26from ..._base_client import AsyncPaginator, make_request_options
27from ...types.eval_list_response import EvalListResponse
28from ...types.eval_create_response import EvalCreateResponse
29from ...types.eval_delete_response import EvalDeleteResponse
30from ...types.eval_update_response import EvalUpdateResponse
31from ...types.eval_retrieve_response import EvalRetrieveResponse
32from ...types.shared_params.metadata import Metadata
33
34__all__ = ["Evals", "AsyncEvals"]
35
36
37class Evals(SyncAPIResource):
38 @cached_property
39 def runs(self) -> Runs:
40 return Runs(self._client)
41
42 @cached_property
43 def with_raw_response(self) -> EvalsWithRawResponse:
44 """
45 This property can be used as a prefix for any HTTP method call to return
46 the raw response object instead of the parsed content.
47
48 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
49 """
50 return EvalsWithRawResponse(self)
51
52 @cached_property
53 def with_streaming_response(self) -> EvalsWithStreamingResponse:
54 """
55 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
56
57 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
58 """
59 return EvalsWithStreamingResponse(self)
60
61 def create(
62 self,
63 *,
64 data_source_config: eval_create_params.DataSourceConfig,
65 testing_criteria: Iterable[eval_create_params.TestingCriterion],
66 metadata: Optional[Metadata] | Omit = omit,
67 name: str | Omit = omit,
68 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
69 # The extra values given here take precedence over values defined on the client or passed to this method.
70 extra_headers: Headers | None = None,
71 extra_query: Query | None = None,
72 extra_body: Body | None = None,
73 timeout: float | httpx.Timeout | None | NotGiven = not_given,
74 ) -> EvalCreateResponse:
75 """
76 Create the structure of an evaluation that can be used to test a model's
77 performance. An evaluation is a set of testing criteria and the config for a
78 data source, which dictates the schema of the data used in the evaluation. After
79 creating an evaluation, you can run it on different models and model parameters.
80 We support several types of graders and datasources. For more information, see
81 the [Evals guide](https://platform.openai.com/docs/guides/evals).
82
83 Args:
84 data_source_config: The configuration for the data source used for the evaluation runs. Dictates the
85 schema of the data used in the evaluation.
86
87 testing_criteria: A list of graders for all eval runs in this group. Graders can reference
88 variables in the data source using double curly braces notation, like
89 `{{item.variable_name}}`. To reference the model's output, use the `sample`
90 namespace (ie, `{{sample.output_text}}`).
91
92 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
93 for storing additional information about the object in a structured format, and
94 querying for objects via API or the dashboard.
95
96 Keys are strings with a maximum length of 64 characters. Values are strings with
97 a maximum length of 512 characters.
98
99 name: The name of the evaluation.
100
101 extra_headers: Send extra headers
102
103 extra_query: Add additional query parameters to the request
104
105 extra_body: Add additional JSON properties to the request
106
107 timeout: Override the client-level default timeout for this request, in seconds
108 """
109 return self._post(
110 "/evals",
111 body=maybe_transform(
112 {
113 "data_source_config": data_source_config,
114 "testing_criteria": testing_criteria,
115 "metadata": metadata,
116 "name": name,
117 },
118 eval_create_params.EvalCreateParams,
119 ),
120 options=make_request_options(
121 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
122 ),
123 cast_to=EvalCreateResponse,
124 )
125
126 def retrieve(
127 self,
128 eval_id: str,
129 *,
130 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
131 # The extra values given here take precedence over values defined on the client or passed to this method.
132 extra_headers: Headers | None = None,
133 extra_query: Query | None = None,
134 extra_body: Body | None = None,
135 timeout: float | httpx.Timeout | None | NotGiven = not_given,
136 ) -> EvalRetrieveResponse:
137 """
138 Get an evaluation by ID.
139
140 Args:
141 extra_headers: Send extra headers
142
143 extra_query: Add additional query parameters to the request
144
145 extra_body: Add additional JSON properties to the request
146
147 timeout: Override the client-level default timeout for this request, in seconds
148 """
149 if not eval_id:
150 raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
151 return self._get(
152 f"/evals/{eval_id}",
153 options=make_request_options(
154 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
155 ),
156 cast_to=EvalRetrieveResponse,
157 )
158
159 def update(
160 self,
161 eval_id: str,
162 *,
163 metadata: Optional[Metadata] | Omit = omit,
164 name: str | Omit = omit,
165 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
166 # The extra values given here take precedence over values defined on the client or passed to this method.
167 extra_headers: Headers | None = None,
168 extra_query: Query | None = None,
169 extra_body: Body | None = None,
170 timeout: float | httpx.Timeout | None | NotGiven = not_given,
171 ) -> EvalUpdateResponse:
172 """
173 Update certain properties of an evaluation.
174
175 Args:
176 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
177 for storing additional information about the object in a structured format, and
178 querying for objects via API or the dashboard.
179
180 Keys are strings with a maximum length of 64 characters. Values are strings with
181 a maximum length of 512 characters.
182
183 name: Rename the evaluation.
184
185 extra_headers: Send extra headers
186
187 extra_query: Add additional query parameters to the request
188
189 extra_body: Add additional JSON properties to the request
190
191 timeout: Override the client-level default timeout for this request, in seconds
192 """
193 if not eval_id:
194 raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
195 return self._post(
196 f"/evals/{eval_id}",
197 body=maybe_transform(
198 {
199 "metadata": metadata,
200 "name": name,
201 },
202 eval_update_params.EvalUpdateParams,
203 ),
204 options=make_request_options(
205 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
206 ),
207 cast_to=EvalUpdateResponse,
208 )
209
210 def list(
211 self,
212 *,
213 after: str | Omit = omit,
214 limit: int | Omit = omit,
215 order: Literal["asc", "desc"] | Omit = omit,
216 order_by: Literal["created_at", "updated_at"] | Omit = omit,
217 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
218 # The extra values given here take precedence over values defined on the client or passed to this method.
219 extra_headers: Headers | None = None,
220 extra_query: Query | None = None,
221 extra_body: Body | None = None,
222 timeout: float | httpx.Timeout | None | NotGiven = not_given,
223 ) -> SyncCursorPage[EvalListResponse]:
224 """
225 List evaluations for a project.
226
227 Args:
228 after: Identifier for the last eval from the previous pagination request.
229
230 limit: Number of evals to retrieve.
231
232 order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
233 descending order.
234
235 order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
236 creation time or `updated_at` for last updated time.
237
238 extra_headers: Send extra headers
239
240 extra_query: Add additional query parameters to the request
241
242 extra_body: Add additional JSON properties to the request
243
244 timeout: Override the client-level default timeout for this request, in seconds
245 """
246 return self._get_api_list(
247 "/evals",
248 page=SyncCursorPage[EvalListResponse],
249 options=make_request_options(
250 extra_headers=extra_headers,
251 extra_query=extra_query,
252 extra_body=extra_body,
253 timeout=timeout,
254 query=maybe_transform(
255 {
256 "after": after,
257 "limit": limit,
258 "order": order,
259 "order_by": order_by,
260 },
261 eval_list_params.EvalListParams,
262 ),
263 ),
264 model=EvalListResponse,
265 )
266
267 def delete(
268 self,
269 eval_id: str,
270 *,
271 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
272 # The extra values given here take precedence over values defined on the client or passed to this method.
273 extra_headers: Headers | None = None,
274 extra_query: Query | None = None,
275 extra_body: Body | None = None,
276 timeout: float | httpx.Timeout | None | NotGiven = not_given,
277 ) -> EvalDeleteResponse:
278 """
279 Delete an evaluation.
280
281 Args:
282 extra_headers: Send extra headers
283
284 extra_query: Add additional query parameters to the request
285
286 extra_body: Add additional JSON properties to the request
287
288 timeout: Override the client-level default timeout for this request, in seconds
289 """
290 if not eval_id:
291 raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
292 return self._delete(
293 f"/evals/{eval_id}",
294 options=make_request_options(
295 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
296 ),
297 cast_to=EvalDeleteResponse,
298 )
299
300
301class AsyncEvals(AsyncAPIResource):
302 @cached_property
303 def runs(self) -> AsyncRuns:
304 return AsyncRuns(self._client)
305
306 @cached_property
307 def with_raw_response(self) -> AsyncEvalsWithRawResponse:
308 """
309 This property can be used as a prefix for any HTTP method call to return
310 the raw response object instead of the parsed content.
311
312 For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
313 """
314 return AsyncEvalsWithRawResponse(self)
315
316 @cached_property
317 def with_streaming_response(self) -> AsyncEvalsWithStreamingResponse:
318 """
319 An alternative to `.with_raw_response` that doesn't eagerly read the response body.
320
321 For more information, see https://www.github.com/openai/openai-python#with_streaming_response
322 """
323 return AsyncEvalsWithStreamingResponse(self)
324
325 async def create(
326 self,
327 *,
328 data_source_config: eval_create_params.DataSourceConfig,
329 testing_criteria: Iterable[eval_create_params.TestingCriterion],
330 metadata: Optional[Metadata] | Omit = omit,
331 name: str | Omit = omit,
332 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
333 # The extra values given here take precedence over values defined on the client or passed to this method.
334 extra_headers: Headers | None = None,
335 extra_query: Query | None = None,
336 extra_body: Body | None = None,
337 timeout: float | httpx.Timeout | None | NotGiven = not_given,
338 ) -> EvalCreateResponse:
339 """
340 Create the structure of an evaluation that can be used to test a model's
341 performance. An evaluation is a set of testing criteria and the config for a
342 data source, which dictates the schema of the data used in the evaluation. After
343 creating an evaluation, you can run it on different models and model parameters.
344 We support several types of graders and datasources. For more information, see
345 the [Evals guide](https://platform.openai.com/docs/guides/evals).
346
347 Args:
348 data_source_config: The configuration for the data source used for the evaluation runs. Dictates the
349 schema of the data used in the evaluation.
350
351 testing_criteria: A list of graders for all eval runs in this group. Graders can reference
352 variables in the data source using double curly braces notation, like
353 `{{item.variable_name}}`. To reference the model's output, use the `sample`
354 namespace (ie, `{{sample.output_text}}`).
355
356 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
357 for storing additional information about the object in a structured format, and
358 querying for objects via API or the dashboard.
359
360 Keys are strings with a maximum length of 64 characters. Values are strings with
361 a maximum length of 512 characters.
362
363 name: The name of the evaluation.
364
365 extra_headers: Send extra headers
366
367 extra_query: Add additional query parameters to the request
368
369 extra_body: Add additional JSON properties to the request
370
371 timeout: Override the client-level default timeout for this request, in seconds
372 """
373 return await self._post(
374 "/evals",
375 body=await async_maybe_transform(
376 {
377 "data_source_config": data_source_config,
378 "testing_criteria": testing_criteria,
379 "metadata": metadata,
380 "name": name,
381 },
382 eval_create_params.EvalCreateParams,
383 ),
384 options=make_request_options(
385 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
386 ),
387 cast_to=EvalCreateResponse,
388 )
389
390 async def retrieve(
391 self,
392 eval_id: str,
393 *,
394 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
395 # The extra values given here take precedence over values defined on the client or passed to this method.
396 extra_headers: Headers | None = None,
397 extra_query: Query | None = None,
398 extra_body: Body | None = None,
399 timeout: float | httpx.Timeout | None | NotGiven = not_given,
400 ) -> EvalRetrieveResponse:
401 """
402 Get an evaluation by ID.
403
404 Args:
405 extra_headers: Send extra headers
406
407 extra_query: Add additional query parameters to the request
408
409 extra_body: Add additional JSON properties to the request
410
411 timeout: Override the client-level default timeout for this request, in seconds
412 """
413 if not eval_id:
414 raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
415 return await self._get(
416 f"/evals/{eval_id}",
417 options=make_request_options(
418 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
419 ),
420 cast_to=EvalRetrieveResponse,
421 )
422
423 async def update(
424 self,
425 eval_id: str,
426 *,
427 metadata: Optional[Metadata] | Omit = omit,
428 name: str | Omit = omit,
429 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
430 # The extra values given here take precedence over values defined on the client or passed to this method.
431 extra_headers: Headers | None = None,
432 extra_query: Query | None = None,
433 extra_body: Body | None = None,
434 timeout: float | httpx.Timeout | None | NotGiven = not_given,
435 ) -> EvalUpdateResponse:
436 """
437 Update certain properties of an evaluation.
438
439 Args:
440 metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
441 for storing additional information about the object in a structured format, and
442 querying for objects via API or the dashboard.
443
444 Keys are strings with a maximum length of 64 characters. Values are strings with
445 a maximum length of 512 characters.
446
447 name: Rename the evaluation.
448
449 extra_headers: Send extra headers
450
451 extra_query: Add additional query parameters to the request
452
453 extra_body: Add additional JSON properties to the request
454
455 timeout: Override the client-level default timeout for this request, in seconds
456 """
457 if not eval_id:
458 raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
459 return await self._post(
460 f"/evals/{eval_id}",
461 body=await async_maybe_transform(
462 {
463 "metadata": metadata,
464 "name": name,
465 },
466 eval_update_params.EvalUpdateParams,
467 ),
468 options=make_request_options(
469 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
470 ),
471 cast_to=EvalUpdateResponse,
472 )
473
474 def list(
475 self,
476 *,
477 after: str | Omit = omit,
478 limit: int | Omit = omit,
479 order: Literal["asc", "desc"] | Omit = omit,
480 order_by: Literal["created_at", "updated_at"] | Omit = omit,
481 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
482 # The extra values given here take precedence over values defined on the client or passed to this method.
483 extra_headers: Headers | None = None,
484 extra_query: Query | None = None,
485 extra_body: Body | None = None,
486 timeout: float | httpx.Timeout | None | NotGiven = not_given,
487 ) -> AsyncPaginator[EvalListResponse, AsyncCursorPage[EvalListResponse]]:
488 """
489 List evaluations for a project.
490
491 Args:
492 after: Identifier for the last eval from the previous pagination request.
493
494 limit: Number of evals to retrieve.
495
496 order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
497 descending order.
498
499 order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
500 creation time or `updated_at` for last updated time.
501
502 extra_headers: Send extra headers
503
504 extra_query: Add additional query parameters to the request
505
506 extra_body: Add additional JSON properties to the request
507
508 timeout: Override the client-level default timeout for this request, in seconds
509 """
510 return self._get_api_list(
511 "/evals",
512 page=AsyncCursorPage[EvalListResponse],
513 options=make_request_options(
514 extra_headers=extra_headers,
515 extra_query=extra_query,
516 extra_body=extra_body,
517 timeout=timeout,
518 query=maybe_transform(
519 {
520 "after": after,
521 "limit": limit,
522 "order": order,
523 "order_by": order_by,
524 },
525 eval_list_params.EvalListParams,
526 ),
527 ),
528 model=EvalListResponse,
529 )
530
531 async def delete(
532 self,
533 eval_id: str,
534 *,
535 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
536 # The extra values given here take precedence over values defined on the client or passed to this method.
537 extra_headers: Headers | None = None,
538 extra_query: Query | None = None,
539 extra_body: Body | None = None,
540 timeout: float | httpx.Timeout | None | NotGiven = not_given,
541 ) -> EvalDeleteResponse:
542 """
543 Delete an evaluation.
544
545 Args:
546 extra_headers: Send extra headers
547
548 extra_query: Add additional query parameters to the request
549
550 extra_body: Add additional JSON properties to the request
551
552 timeout: Override the client-level default timeout for this request, in seconds
553 """
554 if not eval_id:
555 raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
556 return await self._delete(
557 f"/evals/{eval_id}",
558 options=make_request_options(
559 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
560 ),
561 cast_to=EvalDeleteResponse,
562 )
563
564
565class EvalsWithRawResponse:
566 def __init__(self, evals: Evals) -> None:
567 self._evals = evals
568
569 self.create = _legacy_response.to_raw_response_wrapper(
570 evals.create,
571 )
572 self.retrieve = _legacy_response.to_raw_response_wrapper(
573 evals.retrieve,
574 )
575 self.update = _legacy_response.to_raw_response_wrapper(
576 evals.update,
577 )
578 self.list = _legacy_response.to_raw_response_wrapper(
579 evals.list,
580 )
581 self.delete = _legacy_response.to_raw_response_wrapper(
582 evals.delete,
583 )
584
585 @cached_property
586 def runs(self) -> RunsWithRawResponse:
587 return RunsWithRawResponse(self._evals.runs)
588
589
590class AsyncEvalsWithRawResponse:
591 def __init__(self, evals: AsyncEvals) -> None:
592 self._evals = evals
593
594 self.create = _legacy_response.async_to_raw_response_wrapper(
595 evals.create,
596 )
597 self.retrieve = _legacy_response.async_to_raw_response_wrapper(
598 evals.retrieve,
599 )
600 self.update = _legacy_response.async_to_raw_response_wrapper(
601 evals.update,
602 )
603 self.list = _legacy_response.async_to_raw_response_wrapper(
604 evals.list,
605 )
606 self.delete = _legacy_response.async_to_raw_response_wrapper(
607 evals.delete,
608 )
609
610 @cached_property
611 def runs(self) -> AsyncRunsWithRawResponse:
612 return AsyncRunsWithRawResponse(self._evals.runs)
613
614
615class EvalsWithStreamingResponse:
616 def __init__(self, evals: Evals) -> None:
617 self._evals = evals
618
619 self.create = to_streamed_response_wrapper(
620 evals.create,
621 )
622 self.retrieve = to_streamed_response_wrapper(
623 evals.retrieve,
624 )
625 self.update = to_streamed_response_wrapper(
626 evals.update,
627 )
628 self.list = to_streamed_response_wrapper(
629 evals.list,
630 )
631 self.delete = to_streamed_response_wrapper(
632 evals.delete,
633 )
634
635 @cached_property
636 def runs(self) -> RunsWithStreamingResponse:
637 return RunsWithStreamingResponse(self._evals.runs)
638
639
640class AsyncEvalsWithStreamingResponse:
641 def __init__(self, evals: AsyncEvals) -> None:
642 self._evals = evals
643
644 self.create = async_to_streamed_response_wrapper(
645 evals.create,
646 )
647 self.retrieve = async_to_streamed_response_wrapper(
648 evals.retrieve,
649 )
650 self.update = async_to_streamed_response_wrapper(
651 evals.update,
652 )
653 self.list = async_to_streamed_response_wrapper(
654 evals.list,
655 )
656 self.delete = async_to_streamed_response_wrapper(
657 evals.delete,
658 )
659
660 @cached_property
661 def runs(self) -> AsyncRunsWithStreamingResponse:
662 return AsyncRunsWithStreamingResponse(self._evals.runs)