openai-python/src/openai/types/eval_create

  1# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2
  3from __future__ import annotations
  4
  5from typing import Dict, Union, Iterable, Optional
  6from typing_extensions import Literal, Required, TypeAlias, TypedDict
  7
  8from .._types import SequenceNotStr
  9from .shared_params.metadata import Metadata
 10from .graders.python_grader_param import PythonGraderParam
 11from .graders.score_model_grader_param import ScoreModelGraderParam
 12from .graders.string_check_grader_param import StringCheckGraderParam
 13from .responses.response_input_text_param import ResponseInputTextParam
 14from .graders.text_similarity_grader_param import TextSimilarityGraderParam
 15from .responses.response_input_audio_param import ResponseInputAudioParam
 16
 17__all__ = [
 18    "EvalCreateParams",
 19    "DataSourceConfig",
 20    "DataSourceConfigCustom",
 21    "DataSourceConfigLogs",
 22    "DataSourceConfigStoredCompletions",
 23    "TestingCriterion",
 24    "TestingCriterionLabelModel",
 25    "TestingCriterionLabelModelInput",
 26    "TestingCriterionLabelModelInputSimpleInputMessage",
 27    "TestingCriterionLabelModelInputEvalItem",
 28    "TestingCriterionLabelModelInputEvalItemContent",
 29    "TestingCriterionLabelModelInputEvalItemContentOutputText",
 30    "TestingCriterionLabelModelInputEvalItemContentInputImage",
 31    "TestingCriterionTextSimilarity",
 32    "TestingCriterionPython",
 33    "TestingCriterionScoreModel",
 34]
 35
 36
 37class EvalCreateParams(TypedDict, total=False):
 38    data_source_config: Required[DataSourceConfig]
 39    """The configuration for the data source used for the evaluation runs.
 40
 41    Dictates the schema of the data used in the evaluation.
 42    """
 43
 44    testing_criteria: Required[Iterable[TestingCriterion]]
 45    """A list of graders for all eval runs in this group.
 46
 47    Graders can reference variables in the data source using double curly braces
 48    notation, like `{{item.variable_name}}`. To reference the model's output, use
 49    the `sample` namespace (ie, `{{sample.output_text}}`).
 50    """
 51
 52    metadata: Optional[Metadata]
 53    """Set of 16 key-value pairs that can be attached to an object.
 54
 55    This can be useful for storing additional information about the object in a
 56    structured format, and querying for objects via API or the dashboard.
 57
 58    Keys are strings with a maximum length of 64 characters. Values are strings with
 59    a maximum length of 512 characters.
 60    """
 61
 62    name: str
 63    """The name of the evaluation."""
 64
 65
 66class DataSourceConfigCustom(TypedDict, total=False):
 67    item_schema: Required[Dict[str, object]]
 68    """The json schema for each row in the data source."""
 69
 70    type: Required[Literal["custom"]]
 71    """The type of data source. Always `custom`."""
 72
 73    include_sample_schema: bool
 74    """
 75    Whether the eval should expect you to populate the sample namespace (ie, by
 76    generating responses off of your data source)
 77    """
 78
 79
 80class DataSourceConfigLogs(TypedDict, total=False):
 81    type: Required[Literal["logs"]]
 82    """The type of data source. Always `logs`."""
 83
 84    metadata: Dict[str, object]
 85    """Metadata filters for the logs data source."""
 86
 87
 88class DataSourceConfigStoredCompletions(TypedDict, total=False):
 89    type: Required[Literal["stored_completions"]]
 90    """The type of data source. Always `stored_completions`."""
 91
 92    metadata: Dict[str, object]
 93    """Metadata filters for the stored completions data source."""
 94
 95
 96DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]
 97
 98
 99class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):
100    content: Required[str]
101    """The content of the message."""
102
103    role: Required[str]
104    """The role of the message (e.g. "system", "assistant", "user")."""
105
106
107class TestingCriterionLabelModelInputEvalItemContentOutputText(TypedDict, total=False):
108    text: Required[str]
109    """The text output from the model."""
110
111    type: Required[Literal["output_text"]]
112    """The type of the output text. Always `output_text`."""
113
114
115class TestingCriterionLabelModelInputEvalItemContentInputImage(TypedDict, total=False):
116    image_url: Required[str]
117    """The URL of the image input."""
118
119    type: Required[Literal["input_image"]]
120    """The type of the image input. Always `input_image`."""
121
122    detail: str
123    """The detail level of the image to be sent to the model.
124
125    One of `high`, `low`, or `auto`. Defaults to `auto`.
126    """
127
128
129TestingCriterionLabelModelInputEvalItemContent: TypeAlias = Union[
130    str,
131    ResponseInputTextParam,
132    TestingCriterionLabelModelInputEvalItemContentOutputText,
133    TestingCriterionLabelModelInputEvalItemContentInputImage,
134    ResponseInputAudioParam,
135    Iterable[object],
136]
137
138
139class TestingCriterionLabelModelInputEvalItem(TypedDict, total=False):
140    content: Required[TestingCriterionLabelModelInputEvalItemContent]
141    """Inputs to the model - can contain template strings."""
142
143    role: Required[Literal["user", "assistant", "system", "developer"]]
144    """The role of the message input.
145
146    One of `user`, `assistant`, `system`, or `developer`.
147    """
148
149    type: Literal["message"]
150    """The type of the message input. Always `message`."""
151
152
153TestingCriterionLabelModelInput: TypeAlias = Union[
154    TestingCriterionLabelModelInputSimpleInputMessage, TestingCriterionLabelModelInputEvalItem
155]
156
157
158class TestingCriterionLabelModel(TypedDict, total=False):
159    input: Required[Iterable[TestingCriterionLabelModelInput]]
160    """A list of chat messages forming the prompt or context.
161
162    May include variable references to the `item` namespace, ie {{item.name}}.
163    """
164
165    labels: Required[SequenceNotStr[str]]
166    """The labels to classify to each item in the evaluation."""
167
168    model: Required[str]
169    """The model to use for the evaluation. Must support structured outputs."""
170
171    name: Required[str]
172    """The name of the grader."""
173
174    passing_labels: Required[SequenceNotStr[str]]
175    """The labels that indicate a passing result. Must be a subset of labels."""
176
177    type: Required[Literal["label_model"]]
178    """The object type, which is always `label_model`."""
179
180
181class TestingCriterionTextSimilarity(TextSimilarityGraderParam, total=False):
182    pass_threshold: Required[float]
183    """The threshold for the score."""
184
185
186class TestingCriterionPython(PythonGraderParam, total=False):
187    pass_threshold: float
188    """The threshold for the score."""
189
190
191class TestingCriterionScoreModel(ScoreModelGraderParam, total=False):
192    pass_threshold: float
193    """The threshold for the score."""
194
195
196TestingCriterion: TypeAlias = Union[
197    TestingCriterionLabelModel,
198    StringCheckGraderParam,
199    TestingCriterionTextSimilarity,
200    TestingCriterionPython,
201    TestingCriterionScoreModel,
202]