main
1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
3from __future__ import annotations
4
5from typing import Dict, Union, Iterable, Optional
6from typing_extensions import Literal, Required, TypeAlias, TypedDict
7
8from .._types import SequenceNotStr
9from .shared_params.metadata import Metadata
10from .graders.python_grader_param import PythonGraderParam
11from .graders.score_model_grader_param import ScoreModelGraderParam
12from .graders.string_check_grader_param import StringCheckGraderParam
13from .responses.response_input_text_param import ResponseInputTextParam
14from .graders.text_similarity_grader_param import TextSimilarityGraderParam
15from .responses.response_input_audio_param import ResponseInputAudioParam
16
17__all__ = [
18 "EvalCreateParams",
19 "DataSourceConfig",
20 "DataSourceConfigCustom",
21 "DataSourceConfigLogs",
22 "DataSourceConfigStoredCompletions",
23 "TestingCriterion",
24 "TestingCriterionLabelModel",
25 "TestingCriterionLabelModelInput",
26 "TestingCriterionLabelModelInputSimpleInputMessage",
27 "TestingCriterionLabelModelInputEvalItem",
28 "TestingCriterionLabelModelInputEvalItemContent",
29 "TestingCriterionLabelModelInputEvalItemContentOutputText",
30 "TestingCriterionLabelModelInputEvalItemContentInputImage",
31 "TestingCriterionTextSimilarity",
32 "TestingCriterionPython",
33 "TestingCriterionScoreModel",
34]
35
36
37class EvalCreateParams(TypedDict, total=False):
38 data_source_config: Required[DataSourceConfig]
39 """The configuration for the data source used for the evaluation runs.
40
41 Dictates the schema of the data used in the evaluation.
42 """
43
44 testing_criteria: Required[Iterable[TestingCriterion]]
45 """A list of graders for all eval runs in this group.
46
47 Graders can reference variables in the data source using double curly braces
48 notation, like `{{item.variable_name}}`. To reference the model's output, use
49 the `sample` namespace (ie, `{{sample.output_text}}`).
50 """
51
52 metadata: Optional[Metadata]
53 """Set of 16 key-value pairs that can be attached to an object.
54
55 This can be useful for storing additional information about the object in a
56 structured format, and querying for objects via API or the dashboard.
57
58 Keys are strings with a maximum length of 64 characters. Values are strings with
59 a maximum length of 512 characters.
60 """
61
62 name: str
63 """The name of the evaluation."""
64
65
66class DataSourceConfigCustom(TypedDict, total=False):
67 item_schema: Required[Dict[str, object]]
68 """The json schema for each row in the data source."""
69
70 type: Required[Literal["custom"]]
71 """The type of data source. Always `custom`."""
72
73 include_sample_schema: bool
74 """
75 Whether the eval should expect you to populate the sample namespace (ie, by
76 generating responses off of your data source)
77 """
78
79
80class DataSourceConfigLogs(TypedDict, total=False):
81 type: Required[Literal["logs"]]
82 """The type of data source. Always `logs`."""
83
84 metadata: Dict[str, object]
85 """Metadata filters for the logs data source."""
86
87
88class DataSourceConfigStoredCompletions(TypedDict, total=False):
89 type: Required[Literal["stored_completions"]]
90 """The type of data source. Always `stored_completions`."""
91
92 metadata: Dict[str, object]
93 """Metadata filters for the stored completions data source."""
94
95
96DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]
97
98
99class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):
100 content: Required[str]
101 """The content of the message."""
102
103 role: Required[str]
104 """The role of the message (e.g. "system", "assistant", "user")."""
105
106
107class TestingCriterionLabelModelInputEvalItemContentOutputText(TypedDict, total=False):
108 text: Required[str]
109 """The text output from the model."""
110
111 type: Required[Literal["output_text"]]
112 """The type of the output text. Always `output_text`."""
113
114
115class TestingCriterionLabelModelInputEvalItemContentInputImage(TypedDict, total=False):
116 image_url: Required[str]
117 """The URL of the image input."""
118
119 type: Required[Literal["input_image"]]
120 """The type of the image input. Always `input_image`."""
121
122 detail: str
123 """The detail level of the image to be sent to the model.
124
125 One of `high`, `low`, or `auto`. Defaults to `auto`.
126 """
127
128
129TestingCriterionLabelModelInputEvalItemContent: TypeAlias = Union[
130 str,
131 ResponseInputTextParam,
132 TestingCriterionLabelModelInputEvalItemContentOutputText,
133 TestingCriterionLabelModelInputEvalItemContentInputImage,
134 ResponseInputAudioParam,
135 Iterable[object],
136]
137
138
139class TestingCriterionLabelModelInputEvalItem(TypedDict, total=False):
140 content: Required[TestingCriterionLabelModelInputEvalItemContent]
141 """Inputs to the model - can contain template strings."""
142
143 role: Required[Literal["user", "assistant", "system", "developer"]]
144 """The role of the message input.
145
146 One of `user`, `assistant`, `system`, or `developer`.
147 """
148
149 type: Literal["message"]
150 """The type of the message input. Always `message`."""
151
152
153TestingCriterionLabelModelInput: TypeAlias = Union[
154 TestingCriterionLabelModelInputSimpleInputMessage, TestingCriterionLabelModelInputEvalItem
155]
156
157
158class TestingCriterionLabelModel(TypedDict, total=False):
159 input: Required[Iterable[TestingCriterionLabelModelInput]]
160 """A list of chat messages forming the prompt or context.
161
162 May include variable references to the `item` namespace, ie {{item.name}}.
163 """
164
165 labels: Required[SequenceNotStr[str]]
166 """The labels to classify to each item in the evaluation."""
167
168 model: Required[str]
169 """The model to use for the evaluation. Must support structured outputs."""
170
171 name: Required[str]
172 """The name of the grader."""
173
174 passing_labels: Required[SequenceNotStr[str]]
175 """The labels that indicate a passing result. Must be a subset of labels."""
176
177 type: Required[Literal["label_model"]]
178 """The object type, which is always `label_model`."""
179
180
181class TestingCriterionTextSimilarity(TextSimilarityGraderParam, total=False):
182 pass_threshold: Required[float]
183 """The threshold for the score."""
184
185
186class TestingCriterionPython(PythonGraderParam, total=False):
187 pass_threshold: float
188 """The threshold for the score."""
189
190
191class TestingCriterionScoreModel(ScoreModelGraderParam, total=False):
192 pass_threshold: float
193 """The threshold for the score."""
194
195
196TestingCriterion: TypeAlias = Union[
197 TestingCriterionLabelModel,
198 StringCheckGraderParam,
199 TestingCriterionTextSimilarity,
200 TestingCriterionPython,
201 TestingCriterionScoreModel,
202]