Commit bd269ab9

hallacy <hallacy@openai.com>
2023-04-08 08:53:16
Speed up imports (#386)
1 parent cf03fe1
openai/api_resources/embedding.py
@@ -1,10 +1,10 @@
 import base64
 import time
 
-
 from openai import util
 from openai.api_resources.abstract.engine_api_resource import EngineAPIResource
-from openai.datalib import numpy as np, assert_has_numpy
+from openai.datalib.numpy_helper import assert_has_numpy
+from openai.datalib.numpy_helper import numpy as np
 from openai.error import TryAgain
 
 
openai/datalib.py → openai/datalib/common.py
@@ -13,19 +13,6 @@ assertions with instructive error messages.
 See also `setup.py`.
 
 """
-try:
-    import numpy
-except ImportError:
-    numpy = None
-
-try:
-    import pandas
-except ImportError:
-    pandas = None
-
-HAS_NUMPY = bool(numpy)
-HAS_PANDAS = bool(pandas)
-
 INSTRUCTIONS = """
 
 OpenAI error: 
@@ -39,18 +26,7 @@ This feature requires additional dependencies:
 """
 
 NUMPY_INSTRUCTIONS = INSTRUCTIONS.format(library="numpy")
-PANDAS_INSTRUCTIONS = INSTRUCTIONS.format(library="pandas")
 
 
 class MissingDependencyError(Exception):
     pass
-
-
-def assert_has_numpy():
-    if not HAS_NUMPY:
-        raise MissingDependencyError(NUMPY_INSTRUCTIONS)
-
-
-def assert_has_pandas():
-    if not HAS_PANDAS:
-        raise MissingDependencyError(PANDAS_INSTRUCTIONS)
openai/datalib/numpy_helper.py
@@ -0,0 +1,15 @@
+from openai.datalib.common import INSTRUCTIONS, MissingDependencyError
+
+try:
+    import numpy
+except ImportError:
+    numpy = None
+
+HAS_NUMPY = bool(numpy)
+
+NUMPY_INSTRUCTIONS = INSTRUCTIONS.format(library="numpy")
+
+
+def assert_has_numpy():
+    if not HAS_NUMPY:
+        raise MissingDependencyError(NUMPY_INSTRUCTIONS)
openai/datalib/pandas_helper.py
@@ -0,0 +1,15 @@
+from openai.datalib.common import INSTRUCTIONS, MissingDependencyError
+
+try:
+    import pandas
+except ImportError:
+    pandas = None
+
+HAS_PANDAS = bool(pandas)
+
+PANDAS_INSTRUCTIONS = INSTRUCTIONS.format(library="pandas")
+
+
+def assert_has_pandas():
+    if not HAS_PANDAS:
+        raise MissingDependencyError(PANDAS_INSTRUCTIONS)
openai/tests/test_long_examples_validator.py
@@ -4,12 +4,8 @@ from tempfile import NamedTemporaryFile
 
 import pytest
 
-from openai.datalib import (
-    HAS_NUMPY,
-    HAS_PANDAS,
-    NUMPY_INSTRUCTIONS,
-    PANDAS_INSTRUCTIONS,
-)
+from openai.datalib.numpy_helper import HAS_NUMPY, NUMPY_INSTRUCTIONS
+from openai.datalib.pandas_helper import HAS_PANDAS, PANDAS_INSTRUCTIONS
 
 
 @pytest.mark.skipif(not HAS_PANDAS, reason=PANDAS_INSTRUCTIONS)
@@ -54,5 +50,5 @@ def test_long_examples_validator() -> None:
     assert prepared_data_cmd_output.stderr == ""
     # validate get_long_indexes() applied during optional_fn() call in long_examples_validator()
     assert "indices of the long examples has changed" in prepared_data_cmd_output.stdout
-    
+
     return prepared_data_cmd_output.stdout
openai/__init__.py
@@ -3,16 +3,26 @@
 # Originally forked from the MIT-licensed Stripe Python bindings.
 
 import os
+import sys
+from typing import TYPE_CHECKING, Optional
+
 from contextvars import ContextVar
-from typing import Optional, TYPE_CHECKING
+
+if "pkg_resources" not in sys.modules:
+    # workaround for the following:
+    # https://github.com/benoitc/gunicorn/pull/2539
+    sys.modules["pkg_resources"] = object()  # type: ignore[assignment]
+    import aiohttp
+
+    del sys.modules["pkg_resources"]
 
 from openai.api_resources import (
     Audio,
     ChatCompletion,
     Completion,
     Customer,
-    Edit,
     Deployment,
+    Edit,
     Embedding,
     Engine,
     ErrorObject,
openai/embeddings_utils.py
@@ -10,8 +10,8 @@ from sklearn.metrics import average_precision_score, precision_recall_curve
 from tenacity import retry, stop_after_attempt, wait_random_exponential
 
 import openai
-from openai.datalib import numpy as np
-from openai.datalib import pandas as pd
+from openai.datalib.numpy_helper import numpy as np
+from openai.datalib.pandas_helper import pandas as pd
 
 
 @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
openai/validators.py
@@ -2,7 +2,8 @@ import os
 import sys
 from typing import Any, Callable, NamedTuple, Optional
 
-from openai.datalib import pandas as pd, assert_has_pandas
+from openai.datalib.pandas_helper import assert_has_pandas
+from openai.datalib.pandas_helper import pandas as pd
 
 
 class Remediation(NamedTuple):
@@ -158,6 +159,7 @@ def long_examples_validator(df):
 
     ft_type = infer_task_type(df)
     if ft_type != "open-ended generation":
+
         def get_long_indexes(d):
             long_examples = d.apply(
                 lambda x: len(x.prompt) + len(x.completion) > 10000, axis=1
@@ -171,10 +173,12 @@ def long_examples_validator(df):
             optional_msg = f"Remove {len(long_indexes)} long examples"
 
             def optional_fn(x):
-                
+
                 long_indexes_to_drop = get_long_indexes(x)
                 if long_indexes != long_indexes_to_drop:
-                    sys.stdout.write(f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n")
+                    sys.stdout.write(
+                        f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n"
+                    )
                 return x.drop(long_indexes_to_drop)
 
     return Remediation(
openai/wandb_logger.py
@@ -14,8 +14,8 @@ if WANDB_AVAILABLE:
     from pathlib import Path
 
     from openai import File, FineTune
-    from openai.datalib import numpy as np
-    from openai.datalib import pandas as pd
+    from openai.datalib.numpy_helper import numpy as np
+    from openai.datalib.pandas_helper import pandas as pd
 
 
 class WandbLogger: