Commit bd269ab9
Changed files (9)
openai/api_resources/embedding.py
@@ -1,10 +1,10 @@
import base64
import time
-
from openai import util
from openai.api_resources.abstract.engine_api_resource import EngineAPIResource
-from openai.datalib import numpy as np, assert_has_numpy
+from openai.datalib.numpy_helper import assert_has_numpy
+from openai.datalib.numpy_helper import numpy as np
from openai.error import TryAgain
openai/datalib.py → openai/datalib/common.py
@@ -13,19 +13,6 @@ assertions with instructive error messages.
See also `setup.py`.
"""
-try:
- import numpy
-except ImportError:
- numpy = None
-
-try:
- import pandas
-except ImportError:
- pandas = None
-
-HAS_NUMPY = bool(numpy)
-HAS_PANDAS = bool(pandas)
-
INSTRUCTIONS = """
OpenAI error:
@@ -39,18 +26,7 @@ This feature requires additional dependencies:
"""
NUMPY_INSTRUCTIONS = INSTRUCTIONS.format(library="numpy")
-PANDAS_INSTRUCTIONS = INSTRUCTIONS.format(library="pandas")
class MissingDependencyError(Exception):
pass
-
-
-def assert_has_numpy():
- if not HAS_NUMPY:
- raise MissingDependencyError(NUMPY_INSTRUCTIONS)
-
-
-def assert_has_pandas():
- if not HAS_PANDAS:
- raise MissingDependencyError(PANDAS_INSTRUCTIONS)
openai/datalib/numpy_helper.py
@@ -0,0 +1,15 @@
+from openai.datalib.common import INSTRUCTIONS, MissingDependencyError
+
+try:
+ import numpy
+except ImportError:
+ numpy = None
+
+HAS_NUMPY = bool(numpy)
+
+NUMPY_INSTRUCTIONS = INSTRUCTIONS.format(library="numpy")
+
+
+def assert_has_numpy():
+ if not HAS_NUMPY:
+ raise MissingDependencyError(NUMPY_INSTRUCTIONS)
openai/datalib/pandas_helper.py
@@ -0,0 +1,15 @@
+from openai.datalib.common import INSTRUCTIONS, MissingDependencyError
+
+try:
+ import pandas
+except ImportError:
+ pandas = None
+
+HAS_PANDAS = bool(pandas)
+
+PANDAS_INSTRUCTIONS = INSTRUCTIONS.format(library="pandas")
+
+
+def assert_has_pandas():
+ if not HAS_PANDAS:
+ raise MissingDependencyError(PANDAS_INSTRUCTIONS)
openai/tests/test_long_examples_validator.py
@@ -4,12 +4,8 @@ from tempfile import NamedTemporaryFile
import pytest
-from openai.datalib import (
- HAS_NUMPY,
- HAS_PANDAS,
- NUMPY_INSTRUCTIONS,
- PANDAS_INSTRUCTIONS,
-)
+from openai.datalib.numpy_helper import HAS_NUMPY, NUMPY_INSTRUCTIONS
+from openai.datalib.pandas_helper import HAS_PANDAS, PANDAS_INSTRUCTIONS
@pytest.mark.skipif(not HAS_PANDAS, reason=PANDAS_INSTRUCTIONS)
@@ -54,5 +50,5 @@ def test_long_examples_validator() -> None:
assert prepared_data_cmd_output.stderr == ""
# validate get_long_indexes() applied during optional_fn() call in long_examples_validator()
assert "indices of the long examples has changed" in prepared_data_cmd_output.stdout
-
+
return prepared_data_cmd_output.stdout
openai/__init__.py
@@ -3,16 +3,26 @@
# Originally forked from the MIT-licensed Stripe Python bindings.
import os
+import sys
+from typing import TYPE_CHECKING, Optional
+
from contextvars import ContextVar
-from typing import Optional, TYPE_CHECKING
+
+if "pkg_resources" not in sys.modules:
+ # workaround for the following:
+ # https://github.com/benoitc/gunicorn/pull/2539
+ sys.modules["pkg_resources"] = object() # type: ignore[assignment]
+ import aiohttp
+
+ del sys.modules["pkg_resources"]
from openai.api_resources import (
Audio,
ChatCompletion,
Completion,
Customer,
- Edit,
Deployment,
+ Edit,
Embedding,
Engine,
ErrorObject,
openai/embeddings_utils.py
@@ -10,8 +10,8 @@ from sklearn.metrics import average_precision_score, precision_recall_curve
from tenacity import retry, stop_after_attempt, wait_random_exponential
import openai
-from openai.datalib import numpy as np
-from openai.datalib import pandas as pd
+from openai.datalib.numpy_helper import numpy as np
+from openai.datalib.pandas_helper import pandas as pd
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
openai/validators.py
@@ -2,7 +2,8 @@ import os
import sys
from typing import Any, Callable, NamedTuple, Optional
-from openai.datalib import pandas as pd, assert_has_pandas
+from openai.datalib.pandas_helper import assert_has_pandas
+from openai.datalib.pandas_helper import pandas as pd
class Remediation(NamedTuple):
@@ -158,6 +159,7 @@ def long_examples_validator(df):
ft_type = infer_task_type(df)
if ft_type != "open-ended generation":
+
def get_long_indexes(d):
long_examples = d.apply(
lambda x: len(x.prompt) + len(x.completion) > 10000, axis=1
@@ -171,10 +173,12 @@ def long_examples_validator(df):
optional_msg = f"Remove {len(long_indexes)} long examples"
def optional_fn(x):
-
+
long_indexes_to_drop = get_long_indexes(x)
if long_indexes != long_indexes_to_drop:
- sys.stdout.write(f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n")
+ sys.stdout.write(
+ f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n"
+ )
return x.drop(long_indexes_to_drop)
return Remediation(
openai/wandb_logger.py
@@ -14,8 +14,8 @@ if WANDB_AVAILABLE:
from pathlib import Path
from openai import File, FineTune
- from openai.datalib import numpy as np
- from openai.datalib import pandas as pd
+ from openai.datalib.numpy_helper import numpy as np
+ from openai.datalib.pandas_helper import pandas as pd
class WandbLogger: