Commit 870534d3

Logan Kilpatrick <logan@openai.com>
2023-09-26 23:34:18
Update embeddings_utils.py to set default model to text-embedding-ada-002 (#604)
* Update embeddings_utils.py * Update max tokens for new embeddings model
1 parent 2d71184
Changed files (1)
openai/embeddings_utils.py
@@ -15,7 +15,7 @@ from openai.datalib.pandas_helper import pandas as pd
 
 
 @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
-def get_embedding(text: str, engine="text-similarity-davinci-001", **kwargs) -> List[float]:
+def get_embedding(text: str, engine="text-embedding-ada-002", **kwargs) -> List[float]:
 
     # replace newlines, which can negatively affect performance.
     text = text.replace("\n", " ")
@@ -25,7 +25,7 @@ def get_embedding(text: str, engine="text-similarity-davinci-001", **kwargs) ->
 
 @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
 async def aget_embedding(
-    text: str, engine="text-similarity-davinci-001", **kwargs
+    text: str, engine="text-embedding-ada-002", **kwargs
 ) -> List[float]:
 
     # replace newlines, which can negatively affect performance.
@@ -38,9 +38,9 @@ async def aget_embedding(
 
 @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
 def get_embeddings(
-    list_of_text: List[str], engine="text-similarity-babbage-001", **kwargs
+    list_of_text: List[str], engine="text-embedding-ada-002", **kwargs
 ) -> List[List[float]]:
-    assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
+    assert len(list_of_text) <= 8191, "The batch size should not be larger than 8191."
 
     # replace newlines, which can negatively affect performance.
     list_of_text = [text.replace("\n", " ") for text in list_of_text]
@@ -51,9 +51,9 @@ def get_embeddings(
 
 @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
 async def aget_embeddings(
-    list_of_text: List[str], engine="text-similarity-babbage-001", **kwargs
+    list_of_text: List[str], engine="text-embedding-ada-002", **kwargs
 ) -> List[List[float]]:
-    assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
+    assert len(list_of_text) <= 8191, "The batch size should not be larger than 8191."
 
     # replace newlines, which can negatively affect performance.
     list_of_text = [text.replace("\n", " ") for text in list_of_text]