Commit e389823b

Morgan McGuire <morganmcg1@users.noreply.github.com>
2023-09-13 01:54:15
Update the wandb logger (#590)
* Update WandbLogger for new FineTuningJob api * remove prints * add docs link * remove pd * add pandas check * list all jobs * move pandas assert --------- Co-authored-by: Morgan McGuire <morganmcguire@Morgans-MacBook-Pro.local> Co-authored-by: Thomas Capelle <tcapelle@pm.me> Co-authored-by: John Allard <john@jhallard.com>
1 parent 5d50e9e
openai/_openai_scripts.py
@@ -47,7 +47,7 @@ def main():
     subparsers = parser.add_subparsers()
     sub_api = subparsers.add_parser("api", help="Direct API calls")
     sub_tools = subparsers.add_parser("tools", help="Client side tools for convenience")
-    sub_wandb = subparsers.add_parser("wandb", help="Logging with Weights & Biases")
+    sub_wandb = subparsers.add_parser("wandb", help="Logging with Weights & Biases, see https://docs.wandb.ai/guides/integrations/openai for documentation")
 
     api_register(sub_api)
     tools_register(sub_tools)
openai/cli.py
@@ -1375,7 +1375,7 @@ Mutually exclusive with `top_p`.""",
 
 def wandb_register(parser):
     subparsers = parser.add_subparsers(
-        title="wandb", help="Logging with Weights & Biases"
+        title="wandb", help="Logging with Weights & Biases, see https://docs.wandb.ai/guides/integrations/openai for documentation"
     )
 
     def help(args):
@@ -1394,17 +1394,23 @@ def wandb_register(parser):
     )
     sub.add_argument(
         "--project",
-        default="GPT-3",
-        help="""Name of the project where you're sending runs. By default, it is "GPT-3".""",
+        default="OpenAI-Fine-Tune",
+        help="""Name of the Weights & Biases project where you're sending runs. By default, it is "OpenAI-Fine-Tune".""",
     )
     sub.add_argument(
         "--entity",
-        help="Username or team name where you're sending runs. By default, your default entity is used, which is usually your username.",
+        help="Weights & Biases username or team name where you're sending runs. By default, your default entity is used, which is usually your username.",
     )
     sub.add_argument(
         "--force",
         action="store_true",
         help="Forces logging and overwrite existing wandb run of the same fine-tune.",
     )
+    sub.add_argument(
+        "--legacy",
+        action="store_true",
+        help="Log results from legacy OpenAI /v1/fine-tunes api",
+    )
     sub.set_defaults(force=False)
+    sub.set_defaults(legacy=False)
     sub.set_defaults(func=WandbLogger.sync)
openai/wandb_logger.py
@@ -13,9 +13,9 @@ if WANDB_AVAILABLE:
     import re
     from pathlib import Path
 
-    from openai import File, FineTune
+    from openai import File, FineTune, FineTuningJob
     from openai.datalib.numpy_helper import numpy as np
-    from openai.datalib.pandas_helper import pandas as pd
+    from openai.datalib.pandas_helper import assert_has_pandas, pandas as pd
 
 
 class WandbLogger:
@@ -34,9 +34,10 @@ class WandbLogger:
         cls,
         id=None,
         n_fine_tunes=None,
-        project="GPT-3",
+        project="OpenAI-Fine-Tune",
         entity=None,
         force=False,
+        legacy=False,
         **kwargs_wandb_init,
     ):
         """
@@ -47,18 +48,26 @@ class WandbLogger:
         :param entity: Username or team name where you're sending runs. By default, your default entity is used, which is usually your username.
         :param force: Forces logging and overwrite existing wandb run of the same fine-tune.
         """
+        
+        assert_has_pandas()
 
         if not WANDB_AVAILABLE:
             return
 
         if id:
-            fine_tune = FineTune.retrieve(id=id)
+            print("Retrieving fine-tune job...")
+            if legacy:
+                fine_tune = FineTune.retrieve(id=id)
+            else:
+                fine_tune = FineTuningJob.retrieve(id=id)
             fine_tune.pop("events", None)
             fine_tunes = [fine_tune]
-
         else:
             # get list of fine_tune to log
-            fine_tunes = FineTune.list()
+            if legacy:
+                fine_tunes = FineTune.list()
+            else:
+                fine_tunes =  list(FineTuningJob.auto_paging_iter())
             if not fine_tunes or fine_tunes.get("data") is None:
                 print("No fine-tune has been retrieved")
                 return
@@ -76,6 +85,7 @@ class WandbLogger:
                 project,
                 entity,
                 force,
+                legacy,
                 show_individual_warnings,
                 **kwargs_wandb_init,
             )
@@ -94,6 +104,7 @@ class WandbLogger:
         project,
         entity,
         force,
+        legacy,
         show_individual_warnings,
         **kwargs_wandb_init,
     ):
@@ -110,7 +121,10 @@ class WandbLogger:
 
         # check results are present
         try:
-            results_id = fine_tune["result_files"][0]["id"]
+            if legacy:
+                results_id = fine_tune["result_files"][0]["id"]
+            else:
+                results_id = fine_tune["result_files"][0]
             results = File.download(id=results_id).decode("utf-8")
         except:
             if show_individual_warnings:
README.md
@@ -129,6 +129,14 @@ openai.Model.delete("ft:gpt-3.5-turbo:acemeco:suffix:abc123")
 
 You can learn more in our [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning).
 
+To log the training results from fine-tuning to Weights & Biases use:
+
+```
+openai wandb sync
+```
+
+For more information, read the [wandb documentation](https://docs.wandb.ai/guides/integrations/openai) on Weights & Biases.
+
 ### Moderation
 
 OpenAI provides a free Moderation endpoint that can be used to check whether content complies with the OpenAI [content policy](https://platform.openai.com/docs/usage-policies).