Commit a7305833

hallacy <hallacy@openai.com>
2023-01-07 04:40:33
Remove search, answers, classifications (#154) (#168)
* Remove search, answers, classifications * mypy * Update test with new format
1 parent 3afcfb2
openai/api_resources/__init__.py
@@ -1,5 +1,3 @@
-from openai.api_resources.answer import Answer  # noqa: F401
-from openai.api_resources.classification import Classification  # noqa: F401
 from openai.api_resources.completion import Completion  # noqa: F401
 from openai.api_resources.customer import Customer  # noqa: F401
 from openai.api_resources.deployment import Deployment  # noqa: F401
@@ -12,4 +10,3 @@ from openai.api_resources.fine_tune import FineTune  # noqa: F401
 from openai.api_resources.image import Image  # noqa: F401
 from openai.api_resources.model import Model  # noqa: F401
 from openai.api_resources.moderation import Moderation  # noqa: F401
-from openai.api_resources.search import Search  # noqa: F401
openai/api_resources/answer.py
@@ -1,17 +0,0 @@
-from openai.openai_object import OpenAIObject
-
-
-class Answer(OpenAIObject):
-    @classmethod
-    def get_url(self):
-        return "/answers"
-
-    @classmethod
-    def create(cls, **params):
-        instance = cls()
-        return instance.request("post", cls.get_url(), params)
-
-    @classmethod
-    def acreate(cls, **params):
-        instance = cls()
-        return instance.arequest("post", cls.get_url(), params)
openai/api_resources/classification.py
@@ -1,17 +0,0 @@
-from openai.openai_object import OpenAIObject
-
-
-class Classification(OpenAIObject):
-    @classmethod
-    def get_url(self):
-        return "/classifications"
-
-    @classmethod
-    def create(cls, **params):
-        instance = cls()
-        return instance.request("post", cls.get_url(), params)
-
-    @classmethod
-    def acreate(cls, **params):
-        instance = cls()
-        return instance.arequest("post", cls.get_url(), params)
openai/api_resources/engine.py
@@ -3,8 +3,7 @@ import warnings
 
 from openai import util
 from openai.api_resources.abstract import ListableAPIResource, UpdateableAPIResource
-from openai.error import InvalidAPIType, TryAgain
-from openai.util import ApiType
+from openai.error import TryAgain
 
 
 class Engine(ListableAPIResource, UpdateableAPIResource):
@@ -44,22 +43,6 @@ class Engine(ListableAPIResource, UpdateableAPIResource):
 
                 util.log_info("Waiting for model to warm up", error=e)
 
-    def search(self, **params):
-        if self.typed_api_type in (ApiType.AZURE, ApiType.AZURE_AD):
-            return self.request("post", self.instance_url("search"), params)
-        elif self.typed_api_type == ApiType.OPEN_AI:
-            return self.request("post", self.instance_url() + "/search", params)
-        else:
-            raise InvalidAPIType("Unsupported API type %s" % self.api_type)
-
-    def asearch(self, **params):
-        if self.typed_api_type in (ApiType.AZURE, ApiType.AZURE_AD):
-            return self.arequest("post", self.instance_url("search"), params)
-        elif self.typed_api_type == ApiType.OPEN_AI:
-            return self.arequest("post", self.instance_url() + "/search", params)
-        else:
-            raise InvalidAPIType("Unsupported API type %s" % self.api_type)
-
     def embeddings(self, **params):
         warnings.warn(
             "Engine.embeddings is deprecated, use Embedding.create", DeprecationWarning
openai/api_resources/file.py
@@ -24,8 +24,6 @@ class File(ListableAPIResource, DeletableAPIResource):
         organization=None,
         user_provided_filename=None,
     ):
-        if purpose != "search" and model is not None:
-            raise ValueError("'model' is only meaningful if 'purpose' is 'search'")
         requestor = api_requestor.APIRequestor(
             api_key,
             api_base=api_base or openai.api_base,
openai/api_resources/search.py
@@ -1,51 +0,0 @@
-import time
-
-from openai import util
-from openai.api_resources.abstract.engine_api_resource import EngineAPIResource
-from openai.error import TryAgain
-
-
-class Search(EngineAPIResource):
-    OBJECT_NAME = "search"
-
-    @classmethod
-    def create(cls, *args, **kwargs):
-        """
-        Creates a new search for the provided input and parameters.
-
-        See https://beta.openai.com/docs/api-reference/search for a list
-        of valid parameters.
-        """
-
-        start = time.time()
-        timeout = kwargs.pop("timeout", None)
-
-        while True:
-            try:
-                return super().create(*args, **kwargs)
-            except TryAgain as e:
-                if timeout is not None and time.time() > start + timeout:
-                    raise
-
-                util.log_info("Waiting for model to warm up", error=e)
-
-    @classmethod
-    async def acreate(cls, *args, **kwargs):
-        """
-        Creates a new search for the provided input and parameters.
-
-        See https://beta.openai.com/docs/api-reference/search for a list
-        of valid parameters.
-        """
-
-        start = time.time()
-        timeout = kwargs.pop("timeout", None)
-
-        while True:
-            try:
-                return await super().acreate(*args, **kwargs)
-            except TryAgain as e:
-                if timeout is not None and time.time() > start + timeout:
-                    raise
-
-                util.log_info("Waiting for model to warm up", error=e)
openai/tests/test_endpoints.py
@@ -10,10 +10,12 @@ from openai import error
 # FILE TESTS
 def test_file_upload():
     result = openai.File.create(
-        file=io.StringIO(json.dumps({"text": "test file data"})),
-        purpose="search",
+        file=io.StringIO(
+            json.dumps({"prompt": "test file data", "completion": "tada"})
+        ),
+        purpose="fine-tune",
     )
-    assert result.purpose == "search"
+    assert result.purpose == "fine-tune"
     assert "id" in result
 
     result = openai.File.retrieve(id=result.id)
openai/__init__.py
@@ -7,8 +7,6 @@ from contextvars import ContextVar
 from typing import Optional, TYPE_CHECKING
 
 from openai.api_resources import (
-    Answer,
-    Classification,
     Completion,
     Customer,
     Edit,
@@ -21,7 +19,6 @@ from openai.api_resources import (
     Image,
     Model,
     Moderation,
-    Search,
 )
 from openai.error import APIError, InvalidRequestError, OpenAIError
 
@@ -55,8 +52,6 @@ aiosession: ContextVar[Optional["ClientSession"]] = ContextVar(
 
 __all__ = [
     "APIError",
-    "Answer",
-    "Classification",
     "Completion",
     "Customer",
     "Edit",
@@ -71,7 +66,6 @@ __all__ = [
     "Model",
     "Moderation",
     "OpenAIError",
-    "Search",
     "api_base",
     "api_key",
     "api_type",
openai/cli.py
@@ -3,7 +3,6 @@ import os
 import signal
 import sys
 import warnings
-from functools import partial
 from typing import Optional
 
 import requests
@@ -13,11 +12,9 @@ from openai.upload_progress import BufferReader
 from openai.validators import (
     apply_necessary_remediation,
     apply_validators,
-    get_search_validators,
     get_validators,
     read_any_format,
     write_out_file,
-    write_out_search_file,
 )
 
 
@@ -105,40 +102,6 @@ class Engine:
                     sys.stdout.write("\n")
                 sys.stdout.flush()
 
-    @classmethod
-    def search(cls, args):
-        params = {
-            "query": args.query,
-            "max_rerank": args.max_rerank,
-            "return_metadata": args.return_metadata,
-        }
-        if args.documents:
-            params["documents"] = args.documents
-        if args.file:
-            params["file"] = args.file
-
-        if args.version:
-            params["version"] = args.version
-
-        resp = openai.Engine(id=args.id).search(**params)
-        scores = [
-            (search_result["score"], search_result["document"])
-            for search_result in resp["data"]
-        ]
-        scores.sort(reverse=True)
-        dataset = (
-            args.documents if args.documents else [x["text"] for x in resp["data"]]
-        )
-        for score, document_idx in scores:
-            print("=== score {:.3f} ===".format(score))
-            print(dataset[document_idx])
-            if (
-                args.return_metadata
-                and args.file
-                and "metadata" in resp["data"][document_idx]
-            ):
-                print(f"METADATA: {resp['data'][document_idx]['metadata']}")
-
     @classmethod
     def list(cls, args):
         engines = openai.Engine.list()
@@ -230,7 +193,6 @@ class File:
         resp = openai.File.create(
             file=buffer_reader,
             purpose=args.purpose,
-            model=args.model,
             user_provided_filename=args.file,
         )
         print(resp)
@@ -291,51 +253,6 @@ class Image:
         print(resp)
 
 
-class Search:
-    @classmethod
-    def prepare_data(cls, args, purpose):
-
-        sys.stdout.write("Analyzing...\n")
-        fname = args.file
-        auto_accept = args.quiet
-
-        optional_fields = ["metadata"]
-
-        if purpose == "classifications":
-            required_fields = ["text", "label"]
-        else:
-            required_fields = ["text"]
-
-        df, remediation = read_any_format(
-            fname, fields=required_fields + optional_fields
-        )
-
-        if "metadata" not in df:
-            df["metadata"] = None
-
-        apply_necessary_remediation(None, remediation)
-        validators = get_search_validators(required_fields, optional_fields)
-
-        write_out_file_func = partial(
-            write_out_search_file,
-            purpose=purpose,
-            fields=required_fields + optional_fields,
-        )
-
-        apply_validators(
-            df, fname, remediation, validators, auto_accept, write_out_file_func
-        )
-
-    @classmethod
-    def create(cls, args):
-        resp = openai.Search.create(
-            query=args.query,
-            documents=args.documents,
-            model=args.model,
-        )
-        print(resp)
-
-
 class FineTune:
     @classmethod
     def list(cls, args):
@@ -642,57 +559,6 @@ def tools_register(parser):
     )
     sub.set_defaults(func=FineTune.prepare_data)
 
-    sub = subparsers.add_parser("search.prepare_data")
-    sub.add_argument(
-        "-f",
-        "--file",
-        required=True,
-        help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing text examples to be analyzed."
-        "This should be the local file path.",
-    )
-    sub.add_argument(
-        "-q",
-        "--quiet",
-        required=False,
-        action="store_true",
-        help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
-    )
-    sub.set_defaults(func=partial(Search.prepare_data, purpose="search"))
-
-    sub = subparsers.add_parser("classifications.prepare_data")
-    sub.add_argument(
-        "-f",
-        "--file",
-        required=True,
-        help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing text-label examples to be analyzed."
-        "This should be the local file path.",
-    )
-    sub.add_argument(
-        "-q",
-        "--quiet",
-        required=False,
-        action="store_true",
-        help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
-    )
-    sub.set_defaults(func=partial(Search.prepare_data, purpose="classifications"))
-
-    sub = subparsers.add_parser("answers.prepare_data")
-    sub.add_argument(
-        "-f",
-        "--file",
-        required=True,
-        help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing text examples to be analyzed."
-        "This should be the local file path.",
-    )
-    sub.add_argument(
-        "-q",
-        "--quiet",
-        required=False,
-        action="store_true",
-        help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
-    )
-    sub.set_defaults(func=partial(Search.prepare_data, purpose="answer"))
-
 
 def api_register(parser):
     # Engine management
@@ -760,41 +626,6 @@ Mutually exclusive with `top_p`.""",
     )
     sub.set_defaults(func=Engine.generate)
 
-    sub = subparsers.add_parser("engines.search")
-    sub.add_argument("-i", "--id", required=True)
-    sub.add_argument(
-        "-d",
-        "--documents",
-        action="append",
-        help="List of documents to search over. Only one of `documents` or `file` may be supplied.",
-        required=False,
-    )
-    sub.add_argument(
-        "-f",
-        "--file",
-        help="A file id to search over.  Only one of `documents` or `file` may be supplied.",
-        required=False,
-    )
-    sub.add_argument(
-        "--max_rerank",
-        help="The maximum number of documents to be re-ranked and returned by search. This flag only takes effect when `file` is set.",
-        type=int,
-        default=200,
-    )
-    sub.add_argument(
-        "--return_metadata",
-        help="A special boolean flag for showing metadata. If set `true`, each document entry in the returned json will contain a 'metadata' field. Default to be `false`. This flag only takes effect when `file` is set.",
-        type=bool,
-        default=False,
-    )
-    sub.add_argument(
-        "--version",
-        help="The version of the search routing to use",
-    )
-
-    sub.add_argument("-q", "--query", required=True, help="Search query")
-    sub.set_defaults(func=Engine.search)
-
     # Completions
     sub = subparsers.add_parser("completions.create")
     sub.add_argument(
@@ -890,11 +721,6 @@ Mutually exclusive with `top_p`.""",
         help="Why are you uploading this file? (see https://beta.openai.com/docs/api-reference/ for purposes)",
         required=True,
     )
-    sub.add_argument(
-        "-m",
-        "--model",
-        help="Model for search indexing (e.g. 'ada'). Only meaningful if --purpose is 'search'.",
-    )
     sub.set_defaults(func=File.create)
 
     sub = subparsers.add_parser("files.get")
@@ -908,29 +734,6 @@ Mutually exclusive with `top_p`.""",
     sub = subparsers.add_parser("files.list")
     sub.set_defaults(func=File.list)
 
-    # Search
-    sub = subparsers.add_parser("search.create")
-
-    sub.add_argument(
-        "-d",
-        "--documents",
-        help="Documents to search over",
-        type=str,
-        nargs="+",
-    )
-    sub.add_argument(
-        "-q",
-        "--query",
-        required=True,
-        help="Search query",
-    )
-    sub.add_argument(
-        "-m",
-        "--model",
-        help="The model to search with",
-    )
-    sub.set_defaults(func=Search.create)
-
     # Finetune
     sub = subparsers.add_parser("fine_tunes.list")
     sub.set_defaults(func=FineTune.list)
openai/validators.py
@@ -718,30 +718,6 @@ def write_out_file(df, fname, any_remediations, auto_accept):
         sys.stdout.write("Aborting... did not write the file\n")
 
 
-def write_out_search_file(df, fname, any_remediations, auto_accept, fields, purpose):
-    """
-    This function will write out a dataframe to a file, if the user would like to proceed.
-    """
-    input_text = "\n\nYour data will be written to a new JSONL file. Proceed [Y/n]: "
-
-    if not any_remediations:
-        sys.stdout.write(
-            f'\nYou can upload your file:\n> openai api files.create -f "{fname}" -p {purpose}'
-        )
-
-    elif accept_suggestion(input_text, auto_accept):
-        fnames = get_outfnames(fname, split=False)
-
-        assert len(fnames) == 1
-        df[fields].to_json(fnames[0], lines=True, orient="records", force_ascii=False)
-
-        sys.stdout.write(
-            f'\nWrote modified file to {fnames[0]}`\nFeel free to take a look!\n\nNow upload that file:\n> openai api files.create -f "{fnames[0]}" -p {purpose}'
-        )
-    else:
-        sys.stdout.write("Aborting... did not write the file\n")
-
-
 def infer_task_type(df):
     """
     Infer the likely fine-tuning task type from the data
@@ -800,23 +776,6 @@ def get_validators():
     ]
 
 
-def get_search_validators(required_fields, optional_fields):
-    validators = [
-        lambda x: necessary_column_validator(x, field) for field in required_fields
-    ]
-    validators += [
-        lambda x: non_empty_field_validator(x, field) for field in required_fields
-    ]
-    validators += [lambda x: duplicated_rows_validator(x, required_fields)]
-    validators += [
-        lambda x: additional_column_validator(
-            x, fields=required_fields + optional_fields
-        ),
-    ]
-
-    return validators
-
-
 def apply_validators(
     df,
     fname,