From 9a6d1689364a47942b292a1cf2419d818cc2fdf3 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 18:19:48 -0500
Subject: [PATCH 01/26] feat: log fine_tune with wandb

---
 openai/cli.py    |  30 +++++++++++++
 openai/logger.py | 110 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+)
 create mode 100644 openai/logger.py

diff --git a/openai/cli.py b/openai/cli.py
index 872209f5bb..4102cc5b0e 100644
--- a/openai/cli.py
+++ b/openai/cli.py
@@ -13,6 +13,7 @@
     read_any_format,
     write_out_file,
 )
+import openai.logger
 
 
 class bcolors:
@@ -478,6 +479,15 @@ def prepare_data(cls, args):
         write_out_file(df, fname, any_optional_or_necessary_applied, auto_accept)
 
 
+class Logger:
+    @classmethod
+    def log(cls, args):
+        resp = openai.logger.Logger.log(
+            id=args.id, n_jobs=args.n_jobs, project=args.project, entity=args.entity
+        )
+        print(resp)
+
+
 def tools_register(parser):
     subparsers = parser.add_subparsers(
         title="Tools", help="Convenience client side tools"
@@ -863,3 +873,23 @@ def help(args):
     sub = subparsers.add_parser("fine_tunes.cancel")
     sub.add_argument("-i", "--id", required=True, help="The id of the fine-tune job")
     sub.set_defaults(func=FineTune.cancel)
+
+    sub = subparsers.add_parser("fine_tunes.wandb")
+    sub.add_argument("-i", "--id", help="The id of the fine-tune job")
+    sub.add_argument(
+        "-n",
+        "--n_jobs",
+        type=int,
+        default=10,
+        help="Number of most recent fine-tune jobs to log when an id is not provided",
+    )
+    sub.add_argument(
+        "--project",
+        default="GPT-3",
+        help="""Name of the project where you're sending runs. By default, it is "GPT-3".""",
+    )
+    sub.add_argument(
+        "--entity",
+        help="Username or team name where you're sending runs. By default, your default entity is used, which is usually your username.",
+    )
+    sub.set_defaults(func=Logger.log)
diff --git a/openai/logger.py b/openai/logger.py
new file mode 100644
index 0000000000..bb2dd49c3e
--- /dev/null
+++ b/openai/logger.py
@@ -0,0 +1,110 @@
+try:
+    import wandb
+
+    WANDB_AVAILABLE = True
+except:
+    WANDB_AVAILABLE = False
+
+
+if WANDB_AVAILABLE:
+    from openai import FineTune, File
+    import io
+    import numpy as np
+    import pandas as pd
+
+
+class Logger:
+    if not WANDB_AVAILABLE:
+        print("WandbLogger requires wandb to be installed. Run `pip install wandb`.")
+    else:
+        _wandb_api = wandb.Api()
+
+    @classmethod
+    def log(
+        cls,
+        id=None,
+        n_jobs=10,
+        project="GPT-3",
+        entity=None,
+        **kwargs_wandb_init,
+    ):
+        # TODO: add docstring
+
+        if not WANDB_AVAILABLE:
+            return
+
+        if id:
+            fine_tune = FineTune.retrieve(id=id)
+            fine_tune.pop("events", None)
+            fine_tunes = [fine_tune]
+
+        else:
+            # get list of fine_tune to log
+            fine_tunes = FineTune.list()
+            if not fine_tunes or fine_tunes.get("data") is None:
+                print("No fine-tune jobs have been retrieved")
+                return
+            fine_tunes = fine_tunes["data"][-n_jobs:]
+
+        # log starting from oldest fine_tune
+        for fine_tune in fine_tunes:
+            cls._log_fine_tune(fine_tune, project, entity, **kwargs_wandb_init)
+        return "Command completed successfully"
+
+    @classmethod
+    def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init):
+        fine_tune_id = fine_tune.get("id")
+        status = fine_tune.get("status")
+
+        # check run completed successfully
+        if status != "succeeded":
+            print(
+                f'Fine-tune job {fine_tune_id} has the status "{status}" and will not be logged'
+            )
+
+        # check run has not been logged already
+        run_path = f"{project}/{fine_tune_id}"
+        if entity is not None:
+            run_path = f"{entity}/{run_path}"
+        wandb_run = cls._get_wandb_run(run_path)
+        if cls._get_wandb_run(run_path):
+            print(
+                f"Fine-tune job {fine_tune_id} has already been logged at {wandb_run.url}"
+            )
+            return
+            # TODO: add a "force" argument
+
+        # retrieve results
+        results_id = fine_tune["result_files"][0]["id"]
+        results = File.download(id=results_id).decode("utf-8")
+
+        # start a wandb run
+        wandb.init(
+            job_type="finetune",
+            config=fine_tune,
+            project=project,
+            entity=entity,
+            name=fine_tune_id,
+            id=fine_tune_id,
+            **kwargs_wandb_init,
+        )
+
+        # log results
+        df_results = pd.read_csv(io.StringIO(results))
+        for _, row in df_results.iterrows():
+            metrics = {k: v for k, v in row.items() if not np.isnan(v)}
+            step = metrics.pop("step")
+            if step is not None:
+                step = int(step)
+            wandb.log(metrics, step=step)
+
+        # TODO: retrieve training/validation files if not already present
+        # TODO: mark the run as successful so we can overwrite it in case it did not log properly
+        wandb.finish()
+
+    @classmethod
+    def _get_wandb_run(cls, run_path):
+        try:
+            return cls._wandb_api.run(run_path)
+        except Exception as e:
+            return False

From a24678575915fd957e6672f74e99de05da1e06a8 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 20:00:44 -0500
Subject: [PATCH 02/26] feat: ensure we are logged in

---
 openai/logger.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/openai/logger.py b/openai/logger.py
index bb2dd49c3e..12ca3e5fbc 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -15,9 +15,10 @@
 
 class Logger:
     if not WANDB_AVAILABLE:
-        print("WandbLogger requires wandb to be installed. Run `pip install wandb`.")
+        print("Logging requires wandb to be installed. Run `pip install wandb`.")
     else:
         _wandb_api = wandb.Api()
+        _logged_in = False
 
     @classmethod
     def log(
@@ -102,8 +103,17 @@ def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init):
         # TODO: mark the run as successful so we can overwrite it in case it did not log properly
         wandb.finish()
 
+    @classmethod
+    def _ensure_logged_in(cls):
+        if not cls._logged_in:
+            if wandb.login():
+                cls._logged_in = True
+            else:
+                raise Exception("You need to log in to wandb")
+
     @classmethod
     def _get_wandb_run(cls, run_path):
+        cls._ensure_logged_in()
         try:
             return cls._wandb_api.run(run_path)
         except Exception as e:

From b24d1b6d21149974cf1c01c82a6feb73f6803cd2 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 20:08:52 -0500
Subject: [PATCH 03/26] feat: cli wandb namespace

---
 bin/openai    |  4 +++-
 openai/cli.py | 13 ++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/bin/openai b/bin/openai
index 3c34b69347..d234256c62 100755
--- a/bin/openai
+++ b/bin/openai
@@ -4,7 +4,7 @@ import logging
 import sys
 
 import openai
-from openai.cli import api_register, display_error, tools_register
+from openai.cli import api_register, display_error, tools_register, wandb_register
 
 logger = logging.getLogger()
 formatter = logging.Formatter("[%(asctime)s] %(message)s")
@@ -39,9 +39,11 @@ def main():
     subparsers = parser.add_subparsers()
     sub_api = subparsers.add_parser("api", help="Direct API calls")
     sub_tools = subparsers.add_parser("tools", help="Client side tools for convenience")
+    sub_wandb = subparsers.add_parser("wandb", help="Logging with Weights & Biases")
 
     api_register(sub_api)
     tools_register(sub_tools)
+    wandb_register(sub_wandb)
 
     args = parser.parse_args()
     if args.verbosity == 1:
diff --git a/openai/cli.py b/openai/cli.py
index 4102cc5b0e..6cc637baaf 100644
--- a/openai/cli.py
+++ b/openai/cli.py
@@ -874,7 +874,18 @@ def help(args):
     sub.add_argument("-i", "--id", required=True, help="The id of the fine-tune job")
     sub.set_defaults(func=FineTune.cancel)
 
-    sub = subparsers.add_parser("fine_tunes.wandb")
+
+def wandb_register(parser):
+    subparsers = parser.add_subparsers(
+        title="wandb", help="Logging with Weights & Biases"
+    )
+
+    def help(args):
+        parser.print_help()
+
+    parser.set_defaults(func=help)
+
+    sub = subparsers.add_parser("log")
     sub.add_argument("-i", "--id", help="The id of the fine-tune job")
     sub.add_argument(
         "-n",

From dfb67fa67e82ec93dba9929fe4aecace0eb0580a Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 20:17:37 -0500
Subject: [PATCH 04/26] =?UTF-8?q?feat:=C2=A0add=20fine=5Ftuned=5Fmodel=20t?=
 =?UTF-8?q?o=20summary?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 openai/logger.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/openai/logger.py b/openai/logger.py
index 12ca3e5fbc..46185c3d26 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -98,6 +98,9 @@ def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init):
             if step is not None:
                 step = int(step)
             wandb.log(metrics, step=step)
+        fine_tuned_model = fine_tune.get("fine_tuned_model")
+        if fine_tuned_model is not None:
+            wandb.summary["fine_tuned_model"] = fine_tuned_model
 
         # TODO: retrieve training/validation files if not already present
         # TODO: mark the run as successful so we can overwrite it in case it did not log properly

From 1036d6d7e5bd7a8f4d0415a008536e3469b3607b Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 21:21:39 -0500
Subject: [PATCH 05/26] feat: log training & validation files

---
 openai/logger.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/openai/logger.py b/openai/logger.py
index 46185c3d26..6488f1b600 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -11,6 +11,7 @@
     import io
     import numpy as np
     import pandas as pd
+    from pathlib import Path
 
 
 class Logger:
@@ -102,7 +103,9 @@ def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init):
         if fine_tuned_model is not None:
             wandb.summary["fine_tuned_model"] = fine_tuned_model
 
-        # TODO: retrieve training/validation files if not already present
+        # training/validation files
+        cls._log_artifacts(fine_tune)
+
         # TODO: mark the run as successful so we can overwrite it in case it did not log properly
         wandb.finish()
 
@@ -121,3 +124,48 @@ def _get_wandb_run(cls, run_path):
             return cls._wandb_api.run(run_path)
         except Exception as e:
             return False
+
+    @classmethod
+    def _log_artifacts(cls, fine_tune):
+        training_file = (
+            fine_tune["training_files"][0] if fine_tune.get("training_files") else None
+        )
+        validation_file = (
+            fine_tune["validation_files"][0]
+            if fine_tune.get("validation_files")
+            else None
+        )
+        for file, prefix in ((training_file, "train"), (validation_file, "valid")):
+            cls._log_artifact(file, prefix)
+
+    @classmethod
+    def _log_artifact(cls, file, prefix):
+        file_id = file["id"]
+        filename = Path(file["filename"]).name
+        stem = Path(file["filename"]).stem
+
+        # get file content
+        try:
+            file_content = File.download(id=file_id).decode("utf-8")
+        except:
+            print(
+                f"File {file_id} could not be retrieved. Make sure you are allowed to download training/validation files"
+            )
+            return
+        artifact = wandb.Artifact(f"{prefix}-{filename}", type=prefix, metadata=file)
+        with artifact.new_file(filename, mode="w") as f:
+            f.write(file_content)
+
+        # create a Table
+        try:
+            table = cls._make_table(file_content)
+            artifact.add(table, stem)
+        except:
+            print(f"File {file_id} could not be read as a valid JSON file")
+
+        wandb.run.log_artifact(artifact, aliases=[file_id, "latest"])
+
+    @classmethod
+    def _make_table(cls, file_content):
+        df = pd.read_json(io.StringIO(file_content), orient="records", lines=True)
+        return wandb.Table(dataframe=df)

From bb3def60c0c10dbf00857edad78da565be304832 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 21:49:28 -0500
Subject: [PATCH 06/26] feat: re-log if was not successful or force

---
 openai/cli.py    | 12 +++++++++++-
 openai/logger.py | 31 ++++++++++++++++++++++---------
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/openai/cli.py b/openai/cli.py
index 6cc637baaf..54f57ef478 100644
--- a/openai/cli.py
+++ b/openai/cli.py
@@ -483,7 +483,11 @@ class Logger:
     @classmethod
     def log(cls, args):
         resp = openai.logger.Logger.log(
-            id=args.id, n_jobs=args.n_jobs, project=args.project, entity=args.entity
+            id=args.id,
+            n_jobs=args.n_jobs,
+            project=args.project,
+            entity=args.entity,
+            force=args.force,
         )
         print(resp)
 
@@ -903,4 +907,10 @@ def help(args):
         "--entity",
         help="Username or team name where you're sending runs. By default, your default entity is used, which is usually your username.",
     )
+    sub.add_argument(
+        "--force",
+        action="store_true",
+        help="Forces logging and overwrite existing wandb run of the same finetune job.",
+    )
+    sub.set_defaults(force=False)
     sub.set_defaults(func=Logger.log)
diff --git a/openai/logger.py b/openai/logger.py
index 6488f1b600..1dda17197b 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -28,6 +28,7 @@ def log(
         n_jobs=10,
         project="GPT-3",
         entity=None,
+        force=False,
         **kwargs_wandb_init,
     ):
         # TODO: add docstring
@@ -50,11 +51,11 @@ def log(
 
         # log starting from oldest fine_tune
         for fine_tune in fine_tunes:
-            cls._log_fine_tune(fine_tune, project, entity, **kwargs_wandb_init)
+            cls._log_fine_tune(fine_tune, project, entity, force, **kwargs_wandb_init)
         return "Command completed successfully"
 
     @classmethod
-    def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init):
+    def _log_fine_tune(cls, fine_tune, project, entity, force, **kwargs_wandb_init):
         fine_tune_id = fine_tune.get("id")
         status = fine_tune.get("status")
 
@@ -69,12 +70,22 @@ def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init):
         if entity is not None:
             run_path = f"{entity}/{run_path}"
         wandb_run = cls._get_wandb_run(run_path)
-        if cls._get_wandb_run(run_path):
-            print(
-                f"Fine-tune job {fine_tune_id} has already been logged at {wandb_run.url}"
-            )
-            return
-            # TODO: add a "force" argument
+        if wandb_run:
+            wandb_status = wandb_run.summary.get("status")
+            if wandb_status == "succeeded":
+                print(
+                    f"Fine-tune job {fine_tune_id} has already been logged successfully at {wandb_run.url}"
+                )
+                if not force:
+                    print(
+                        'Use "--force" in the CLI or "force=True" in python if you want to overwrite previous run'
+                    )
+            if wandb_status != "succeeded" or force:
+                print(
+                    f"A new wandb run will be created for fine-tune job {fine_tune_id} and previous run will be overwritten"
+                )
+            else:
+                return
 
         # retrieve results
         results_id = fine_tune["result_files"][0]["id"]
@@ -106,7 +117,9 @@ def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init):
         # training/validation files
         cls._log_artifacts(fine_tune)
 
-        # TODO: mark the run as successful so we can overwrite it in case it did not log properly
+        # mark run as complete
+        wandb.summary["status"] = "succeeded"
+
         wandb.finish()
 
     @classmethod

From 2d34eb28a407591aa9cc684e29ccd3712de5df1b Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 21:55:55 -0500
Subject: [PATCH 07/26] doc: add docstring

---
 openai/logger.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/openai/logger.py b/openai/logger.py
index 1dda17197b..fe26debdb2 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -15,6 +15,10 @@
 
 
 class Logger:
+    """
+    Log fine-tune jobs to Weights & Biases
+    """
+
     if not WANDB_AVAILABLE:
         print("Logging requires wandb to be installed. Run `pip install wandb`.")
     else:
@@ -31,7 +35,14 @@ def log(
         force=False,
         **kwargs_wandb_init,
     ):
-        # TODO: add docstring
+        """
+        Log a fine-tune jobs to Weights & Biases.
+        :param id: The id of the fine-tune job (optional)
+        :param n_jobs: Number of most recent fine-tune jobs to log when an id is not provided
+        :param project: Name of the project where you're sending runs. By default, it is "GPT-3".
+        :param entity: Username or team name where you're sending runs. By default, your default entity is used, which is usually your username.
+        :param force: Forces logging and overwrite existing wandb run of the same finetune job.
+        """
 
         if not WANDB_AVAILABLE:
             return

From 0b0f4566cdc62d4142c04fb02556b9915805f185 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 22:21:12 -0500
Subject: [PATCH 08/26] feat: set wandb api only when needed

---
 openai/logger.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/openai/logger.py b/openai/logger.py
index fe26debdb2..9910386b2e 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -22,7 +22,7 @@ class Logger:
     if not WANDB_AVAILABLE:
         print("Logging requires wandb to be installed. Run `pip install wandb`.")
     else:
-        _wandb_api = wandb.Api()
+        _wandb_api = False
         _logged_in = False
 
     @classmethod
@@ -145,6 +145,8 @@ def _ensure_logged_in(cls):
     def _get_wandb_run(cls, run_path):
         cls._ensure_logged_in()
         try:
+            if cls._wandb_api is None:
+                cls._wandb_api = wandb.Api()
             return cls._wandb_api.run(run_path)
         except Exception as e:
             return False

From c948a2e7534fbf05b2a33a3a119996420bd44e2c Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 22:38:40 -0500
Subject: [PATCH 09/26] fix: train/validation files are inputs

---
 openai/logger.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openai/logger.py b/openai/logger.py
index 9910386b2e..768392a61a 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -189,7 +189,7 @@ def _log_artifact(cls, file, prefix):
         except:
             print(f"File {file_id} could not be read as a valid JSON file")
 
-        wandb.run.log_artifact(artifact, aliases=[file_id, "latest"])
+        wandb.run.use_artifact(artifact, aliases=[file_id, "latest"])
 
     @classmethod
     def _make_table(cls, file_content):

From 1d102353910b9df706cc6dff118ce794488dc5a1 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 22:43:04 -0500
Subject: [PATCH 10/26] feat: rename artifact type

---
 openai/logger.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/openai/logger.py b/openai/logger.py
index 768392a61a..0a5013dc19 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -161,11 +161,14 @@ def _log_artifacts(cls, fine_tune):
             if fine_tune.get("validation_files")
             else None
         )
-        for file, prefix in ((training_file, "train"), (validation_file, "valid")):
-            cls._log_artifact(file, prefix)
+        for file, prefix, artifact_type in (
+            (training_file, "train", "training_files"),
+            (validation_file, "valid", "validation_files"),
+        ):
+            cls._log_artifact(file, prefix, artifact_type)
 
     @classmethod
-    def _log_artifact(cls, file, prefix):
+    def _log_artifact(cls, file, prefix, artifact_type):
         file_id = file["id"]
         filename = Path(file["filename"]).name
         stem = Path(file["filename"]).stem
@@ -178,7 +181,9 @@ def _log_artifact(cls, file, prefix):
                 f"File {file_id} could not be retrieved. Make sure you are allowed to download training/validation files"
             )
             return
-        artifact = wandb.Artifact(f"{prefix}-{filename}", type=prefix, metadata=file)
+        artifact = wandb.Artifact(
+            f"{prefix}-{filename}", type=artifact_type, metadata=file
+        )
         with artifact.new_file(filename, mode="w") as f:
             f.write(file_content)
 

From 146cdaa39793e1a70e80839e5870a549ef7c948a Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 3 Nov 2021 23:29:24 -0500
Subject: [PATCH 11/26] =?UTF-8?q?feat:=C2=A0improve=20config=20logging?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 openai/logger.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/openai/logger.py b/openai/logger.py
index 0a5013dc19..4f6e5f1507 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -105,7 +105,7 @@ def _log_fine_tune(cls, fine_tune, project, entity, force, **kwargs_wandb_init):
         # start a wandb run
         wandb.init(
             job_type="finetune",
-            config=fine_tune,
+            config=cls._get_config(fine_tune),
             project=project,
             entity=entity,
             name=fine_tune_id,
@@ -151,14 +151,24 @@ def _get_wandb_run(cls, run_path):
         except Exception as e:
             return False
 
+    @classmethod
+    def _get_config(cls, fine_tune):
+        config = dict(fine_tune)
+        for key in ("training_files", "validation_files", "result_files"):
+            if config.get(key) and len(config[key]):
+                config[key] = config[key][0]
+        return config
+
     @classmethod
     def _log_artifacts(cls, fine_tune):
         training_file = (
-            fine_tune["training_files"][0] if fine_tune.get("training_files") else None
+            fine_tune["training_files"][0]
+            if fine_tune.get("training_files") and len(fine_tune["training_files"])
+            else None
         )
         validation_file = (
             fine_tune["validation_files"][0]
-            if fine_tune.get("validation_files")
+            if fine_tune.get("validation_files") and len(fine_tune["validation_files"])
             else None
         )
         for file, prefix, artifact_type in (

From ce7352e35b06b995e8defff96fd06c1ada0ef95b Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Fri, 5 Nov 2021 11:20:47 -0500
Subject: [PATCH 12/26] =?UTF-8?q?feat:=C2=A0log=20all=20jobs=20by=20defaul?=
 =?UTF-8?q?t?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 openai/cli.py    |  2 +-
 openai/logger.py | 63 +++++++++++++++++++++++++++++++++---------------
 2 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/openai/cli.py b/openai/cli.py
index 54f57ef478..71a5bd7b1d 100644
--- a/openai/cli.py
+++ b/openai/cli.py
@@ -895,7 +895,7 @@ def help(args):
         "-n",
         "--n_jobs",
         type=int,
-        default=10,
+        default=None,
         help="Number of most recent fine-tune jobs to log when an id is not provided",
     )
     sub.add_argument(
diff --git a/openai/logger.py b/openai/logger.py
index 4f6e5f1507..fc22790ce3 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -22,14 +22,14 @@ class Logger:
     if not WANDB_AVAILABLE:
         print("Logging requires wandb to be installed. Run `pip install wandb`.")
     else:
-        _wandb_api = False
+        _wandb_api = None
         _logged_in = False
 
     @classmethod
     def log(
         cls,
         id=None,
-        n_jobs=10,
+        n_jobs=None,
         project="GPT-3",
         entity=None,
         force=False,
@@ -58,23 +58,40 @@ def log(
             if not fine_tunes or fine_tunes.get("data") is None:
                 print("No fine-tune jobs have been retrieved")
                 return
-            fine_tunes = fine_tunes["data"][-n_jobs:]
+            fine_tunes = fine_tunes["data"][-n_jobs if n_jobs is not None else None :]
 
         # log starting from oldest fine_tune
-        for fine_tune in fine_tunes:
-            cls._log_fine_tune(fine_tune, project, entity, force, **kwargs_wandb_init)
-        return "Command completed successfully"
+        show_warnings = False if id is None and n_jobs is None else True
+        fine_tune_logged = [
+            cls._log_fine_tune(
+                fine_tune,
+                project,
+                entity,
+                force,
+                show_warnings,
+                **kwargs_wandb_init,
+            )
+            for fine_tune in fine_tunes
+        ]
+
+        if not show_warnings and not any(fine_tune_logged):
+            print("No new successful fine-tune were found")
+
+        return "🎉 wandb log completed successfully"
 
     @classmethod
-    def _log_fine_tune(cls, fine_tune, project, entity, force, **kwargs_wandb_init):
+    def _log_fine_tune(
+        cls, fine_tune, project, entity, force, show_warnings, **kwargs_wandb_init
+    ):
         fine_tune_id = fine_tune.get("id")
         status = fine_tune.get("status")
 
         # check run completed successfully
-        if status != "succeeded":
+        if show_warnings and status != "succeeded":
             print(
                 f'Fine-tune job {fine_tune_id} has the status "{status}" and will not be logged'
             )
+            return
 
         # check run has not been logged already
         run_path = f"{project}/{fine_tune_id}"
@@ -83,19 +100,24 @@ def _log_fine_tune(cls, fine_tune, project, entity, force, **kwargs_wandb_init):
         wandb_run = cls._get_wandb_run(run_path)
         if wandb_run:
             wandb_status = wandb_run.summary.get("status")
-            if wandb_status == "succeeded":
-                print(
-                    f"Fine-tune job {fine_tune_id} has already been logged successfully at {wandb_run.url}"
-                )
-                if not force:
+            if show_warnings:
+                if wandb_status == "succeeded":
                     print(
-                        'Use "--force" in the CLI or "force=True" in python if you want to overwrite previous run'
+                        f"Fine-tune job {fine_tune_id} has already been logged successfully at {wandb_run.url}"
                     )
-            if wandb_status != "succeeded" or force:
-                print(
-                    f"A new wandb run will be created for fine-tune job {fine_tune_id} and previous run will be overwritten"
-                )
-            else:
+                    if not force:
+                        print(
+                            'Use "--force" in the CLI or "force=True" in python if you want to overwrite previous run'
+                        )
+                else:
+                    print(
+                        f"A run for fine-tune job {fine_tune_id} was previously created but didn't end successfully"
+                    )
+                if wandb_status != "succeeded" or force:
+                    print(
+                        f"A new wandb run will be created for fine-tune job {fine_tune_id} and previous run will be overwritten"
+                    )
+            if wandb_status == "succeeded":
                 return
 
         # retrieve results
@@ -132,6 +154,7 @@ def _log_fine_tune(cls, fine_tune, project, entity, force, **kwargs_wandb_init):
         wandb.summary["status"] = "succeeded"
 
         wandb.finish()
+        return True
 
     @classmethod
     def _ensure_logged_in(cls):
@@ -148,7 +171,7 @@ def _get_wandb_run(cls, run_path):
             if cls._wandb_api is None:
                 cls._wandb_api = wandb.Api()
             return cls._wandb_api.run(run_path)
-        except Exception as e:
+        except Exception:
             return False
 
     @classmethod

From 5e6dbe94910b9281e250fafbd50a311a78216d11 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Fri, 5 Nov 2021 11:40:15 -0500
Subject: [PATCH 13/26] feat: log job details

---
 openai/logger.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/openai/logger.py b/openai/logger.py
index fc22790ce3..2fb44accf7 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -9,6 +9,7 @@
 if WANDB_AVAILABLE:
     from openai import FineTune, File
     import io
+    import json
     import numpy as np
     import pandas as pd
     from pathlib import Path
@@ -147,7 +148,7 @@ def _log_fine_tune(
         if fine_tuned_model is not None:
             wandb.summary["fine_tuned_model"] = fine_tuned_model
 
-        # training/validation files
+        # training/validation files and job details
         cls._log_artifacts(fine_tune)
 
         # mark run as complete
@@ -184,6 +185,7 @@ def _get_config(cls, fine_tune):
 
     @classmethod
     def _log_artifacts(cls, fine_tune):
+        # training/validation files
         training_file = (
             fine_tune["training_files"][0]
             if fine_tune.get("training_files") and len(fine_tune["training_files"])
@@ -198,10 +200,24 @@ def _log_artifacts(cls, fine_tune):
             (training_file, "train", "training_files"),
             (validation_file, "valid", "validation_files"),
         ):
-            cls._log_artifact(file, prefix, artifact_type)
+            cls._log_artifact_inputs(file, prefix, artifact_type)
+
+        # job details
+        fine_tune_id = fine_tune.get("id")
+        artifact = wandb.Artifact(
+            "job_details",
+            type="job_details",
+            metadata=fine_tune,
+        )
+        with artifact.new_file("job_details.json") as f:
+            json.dump(fine_tune, f, indent=2)
+        wandb.run.log_artifact(
+            artifact,
+            aliases=[fine_tune_id, "latest"],
+        )
 
     @classmethod
-    def _log_artifact(cls, file, prefix, artifact_type):
+    def _log_artifact_inputs(cls, file, prefix, artifact_type):
         file_id = file["id"]
         filename = Path(file["filename"]).name
         stem = Path(file["filename"]).stem

From 00111baeb67cfe6c27bd111dee2ae0f4c22b1a59 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Sat, 6 Nov 2021 10:03:56 -0500
Subject: [PATCH 14/26] feat: log -> sync

---
 openai/cli.py    | 6 +++---
 openai/logger.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/openai/cli.py b/openai/cli.py
index 71a5bd7b1d..803e9f780f 100644
--- a/openai/cli.py
+++ b/openai/cli.py
@@ -889,14 +889,14 @@ def help(args):
 
     parser.set_defaults(func=help)
 
-    sub = subparsers.add_parser("log")
-    sub.add_argument("-i", "--id", help="The id of the fine-tune job")
+    sub = subparsers.add_parser("sync")
+    sub.add_argument("-i", "--id", help="The id of the fine-tune job (optional)")
     sub.add_argument(
         "-n",
         "--n_jobs",
         type=int,
         default=None,
-        help="Number of most recent fine-tune jobs to log when an id is not provided",
+        help="Number of most recent fine-tune jobs to log when an id is not provided. By default, every fine-tune is synced.",
     )
     sub.add_argument(
         "--project",
diff --git a/openai/logger.py b/openai/logger.py
index 2fb44accf7..1ea10845fc 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -27,7 +27,7 @@ class Logger:
         _logged_in = False
 
     @classmethod
-    def log(
+    def sync(
         cls,
         id=None,
         n_jobs=None,
@@ -37,7 +37,7 @@ def log(
         **kwargs_wandb_init,
     ):
         """
-        Log a fine-tune jobs to Weights & Biases.
+        Sync fine-tune job to Weights & Biases.
         :param id: The id of the fine-tune job (optional)
         :param n_jobs: Number of most recent fine-tune jobs to log when an id is not provided
         :param project: Name of the project where you're sending runs. By default, it is "GPT-3".

From 9a3edcba0fcfbf54c8ae95d7f96589ef47752ad1 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Mon, 8 Nov 2021 21:01:10 -0600
Subject: [PATCH 15/26] feat: cli wandb log -> sync

---
 openai/cli.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/openai/cli.py b/openai/cli.py
index 803e9f780f..ad6120a1bb 100644
--- a/openai/cli.py
+++ b/openai/cli.py
@@ -481,8 +481,8 @@ def prepare_data(cls, args):
 
 class Logger:
     @classmethod
-    def log(cls, args):
-        resp = openai.logger.Logger.log(
+    def sync(cls, args):
+        resp = openai.logger.Logger.sync(
             id=args.id,
             n_jobs=args.n_jobs,
             project=args.project,
@@ -913,4 +913,4 @@ def help(args):
         help="Forces logging and overwrite existing wandb run of the same finetune job.",
     )
     sub.set_defaults(force=False)
-    sub.set_defaults(func=Logger.log)
+    sub.set_defaults(func=Logger.sync)

From 2c151f3ae5ff7a6de3d80eb4a0914f29e79beecc Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Thu, 2 Dec 2021 16:05:23 -0600
Subject: [PATCH 16/26] fix: validation_files not always present

---
 openai/logger.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openai/logger.py b/openai/logger.py
index 1ea10845fc..7ed92c6ae7 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -199,8 +199,9 @@ def _log_artifacts(cls, fine_tune):
         for file, prefix, artifact_type in (
             (training_file, "train", "training_files"),
             (validation_file, "valid", "validation_files"),
-        ):
-            cls._log_artifact_inputs(file, prefix, artifact_type)
+         ):
+            if file is not None:
+                cls._log_artifact_inputs(file, prefix, artifact_type)
 
         # job details
         fine_tune_id = fine_tune.get("id")

From 9eccf8409fa753648ff1d0ff8584d04786c5d0e0 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Thu, 2 Dec 2021 21:00:10 -0600
Subject: [PATCH 17/26] =?UTF-8?q?feat:=E2=80=AFformat=20created=5Fat=20+?=
 =?UTF-8?q?=20style?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 openai/cli.py    |  2 +-
 openai/logger.py | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/openai/cli.py b/openai/cli.py
index ad6120a1bb..211d2591f0 100644
--- a/openai/cli.py
+++ b/openai/cli.py
@@ -5,6 +5,7 @@
 import warnings
 
 import openai
+import openai.logger
 from openai.upload_progress import BufferReader
 from openai.validators import (
     apply_necessary_remediation,
@@ -13,7 +14,6 @@
     read_any_format,
     write_out_file,
 )
-import openai.logger
 
 
 class bcolors:
diff --git a/openai/logger.py b/openai/logger.py
index 7ed92c6ae7..434a0ccbce 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -7,12 +7,15 @@
 
 
 if WANDB_AVAILABLE:
-    from openai import FineTune, File
+    import datetime
     import io
     import json
+    from pathlib import Path
+
     import numpy as np
     import pandas as pd
-    from pathlib import Path
+
+    from openai import File, FineTune
 
 
 class Logger:
@@ -181,6 +184,8 @@ def _get_config(cls, fine_tune):
         for key in ("training_files", "validation_files", "result_files"):
             if config.get(key) and len(config[key]):
                 config[key] = config[key][0]
+        if config.get("created_at"):
+            config["created_at"] = datetime.datetime.fromtimestamp(config["created_at"])
         return config
 
     @classmethod
@@ -199,7 +204,7 @@ def _log_artifacts(cls, fine_tune):
         for file, prefix, artifact_type in (
             (training_file, "train", "training_files"),
             (validation_file, "valid", "validation_files"),
-         ):
+        ):
             if file is not None:
                 cls._log_artifact_inputs(file, prefix, artifact_type)
 

From 84c2cbdf83912f9e3be9cb8c9066140b9b8dc6ab Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Thu, 2 Dec 2021 21:11:40 -0600
Subject: [PATCH 18/26] feat: log number of training/validation samples

---
 openai/logger.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openai/logger.py b/openai/logger.py
index 434a0ccbce..c5b11f45d2 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -244,8 +244,9 @@ def _log_artifact_inputs(cls, file, prefix, artifact_type):
 
         # create a Table
         try:
-            table = cls._make_table(file_content)
+            table, n_items = cls._make_table(file_content)
             artifact.add(table, stem)
+            wandb.config.update({f"n_{prefix}": n_items})
         except:
             print(f"File {file_id} could not be read as a valid JSON file")
 
@@ -254,4 +255,4 @@ def _log_artifact_inputs(cls, file, prefix, artifact_type):
     @classmethod
     def _make_table(cls, file_content):
         df = pd.read_json(io.StringIO(file_content), orient="records", lines=True)
-        return wandb.Table(dataframe=df)
+        return wandb.Table(dataframe=df), len(df)

From 8ac09d3c4331bb574d95cbca04deb68eef88d55b Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Tue, 18 Jan 2022 14:25:05 -0600
Subject: [PATCH 19/26] feat(wandb): avoid download if file already synced

---
 openai/logger.py | 78 +++++++++++++++++++++++++++++-------------------
 1 file changed, 48 insertions(+), 30 deletions(-)

diff --git a/openai/logger.py b/openai/logger.py
index c5b11f45d2..8d5be6a459 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -81,7 +81,7 @@ def sync(
         if not show_warnings and not any(fine_tune_logged):
             print("No new successful fine-tune were found")
 
-        return "🎉 wandb log completed successfully"
+        return "🎉 wandb sync completed successfully"
 
     @classmethod
     def _log_fine_tune(
@@ -152,7 +152,7 @@ def _log_fine_tune(
             wandb.summary["fine_tuned_model"] = fine_tuned_model
 
         # training/validation files and job details
-        cls._log_artifacts(fine_tune)
+        cls._log_artifacts(fine_tune, project, entity)
 
         # mark run as complete
         wandb.summary["status"] = "succeeded"
@@ -176,7 +176,17 @@ def _get_wandb_run(cls, run_path):
                 cls._wandb_api = wandb.Api()
             return cls._wandb_api.run(run_path)
         except Exception:
-            return False
+            return None
+
+    @classmethod
+    def _get_wandb_artifact(cls, artifact_path):
+        cls._ensure_logged_in()
+        try:
+            if cls._wandb_api is None:
+                cls._wandb_api = wandb.Api()
+            return cls._wandb_api.artifact(artifact_path)
+        except Exception:
+            return None
 
     @classmethod
     def _get_config(cls, fine_tune):
@@ -189,7 +199,7 @@ def _get_config(cls, fine_tune):
         return config
 
     @classmethod
-    def _log_artifacts(cls, fine_tune):
+    def _log_artifacts(cls, fine_tune, project, entity):
         # training/validation files
         training_file = (
             fine_tune["training_files"][0]
@@ -206,7 +216,7 @@ def _log_artifacts(cls, fine_tune):
             (validation_file, "valid", "validation_files"),
         ):
             if file is not None:
-                cls._log_artifact_inputs(file, prefix, artifact_type)
+                cls._log_artifact_inputs(file, prefix, artifact_type, project, entity)
 
         # job details
         fine_tune_id = fine_tune.get("id")
@@ -219,38 +229,46 @@ def _log_artifacts(cls, fine_tune):
             json.dump(fine_tune, f, indent=2)
         wandb.run.log_artifact(
             artifact,
-            aliases=[fine_tune_id, "latest"],
+            aliases=["latest", fine_tune_id],
         )
 
     @classmethod
-    def _log_artifact_inputs(cls, file, prefix, artifact_type):
+    def _log_artifact_inputs(cls, file, prefix, artifact_type, project, entity):
         file_id = file["id"]
         filename = Path(file["filename"]).name
         stem = Path(file["filename"]).stem
 
-        # get file content
-        try:
-            file_content = File.download(id=file_id).decode("utf-8")
-        except:
-            print(
-                f"File {file_id} could not be retrieved. Make sure you are allowed to download training/validation files"
-            )
-            return
-        artifact = wandb.Artifact(
-            f"{prefix}-{filename}", type=artifact_type, metadata=file
-        )
-        with artifact.new_file(filename, mode="w") as f:
-            f.write(file_content)
-
-        # create a Table
-        try:
-            table, n_items = cls._make_table(file_content)
-            artifact.add(table, stem)
-            wandb.config.update({f"n_{prefix}": n_items})
-        except:
-            print(f"File {file_id} could not be read as a valid JSON file")
-
-        wandb.run.use_artifact(artifact, aliases=[file_id, "latest"])
+        # get input artifact
+        artifact_name = f"{prefix}-{filename}"
+        artifact_alias = file_id
+        artifact_path = f"{project}/{artifact_name}:{artifact_alias}"
+        if entity is not None:
+            artifact_path = f"{entity}/{artifact_path}"
+        artifact = cls._get_wandb_artifact(artifact_path)
+
+        # create artifact if file not already logged previously
+        if artifact is None:
+            # get file content
+            try:
+                file_content = File.download(id=file_id).decode("utf-8")
+            except:
+                print(
+                    f"File {file_id} could not be retrieved. Make sure you are allowed to download training/validation files"
+                )
+                return
+            artifact = wandb.Artifact(artifact_name, type=artifact_type, metadata=file)
+            with artifact.new_file(filename, mode="w") as f:
+                f.write(file_content)
+
+            # create a Table
+            try:
+                table, n_items = cls._make_table(file_content)
+                artifact.add(table, stem)
+                wandb.config.update({f"n_{prefix}": n_items})
+            except:
+                print(f"File {file_id} could not be read as a valid JSON file")
+
+        wandb.run.use_artifact(artifact, aliases=["latest", artifact_alias])
 
     @classmethod
     def _make_table(cls, file_content):

From 08ef4de2531acf67e4830e44b0994109211a15d6 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Tue, 18 Jan 2022 14:31:31 -0600
Subject: [PATCH 20/26] feat(wandb): add number of items to metadata

---
 openai/logger.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openai/logger.py b/openai/logger.py
index 8d5be6a459..6c51bb72cd 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -265,6 +265,7 @@ def _log_artifact_inputs(cls, file, prefix, artifact_type, project, entity):
                 table, n_items = cls._make_table(file_content)
                 artifact.add(table, stem)
                 wandb.config.update({f"n_{prefix}": n_items})
+                artifact.metadata["items"] = n_items
             except:
                 print(f"File {file_id} could not be read as a valid JSON file")
 

From 9c3738c7a7832e9d063a90c9fd18c7a61f41bf20 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Tue, 18 Jan 2022 14:51:12 -0600
Subject: [PATCH 21/26] fix(wandb): allow force sync

---
 openai/logger.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openai/logger.py b/openai/logger.py
index 6c51bb72cd..33747adcaf 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -121,7 +121,7 @@ def _log_fine_tune(
                     print(
                         f"A new wandb run will be created for fine-tune job {fine_tune_id} and previous run will be overwritten"
                     )
-            if wandb_status == "succeeded":
+            if wandb_status == "succeeded" and not force:
                 return
 
         # retrieve results

From 39e747c981fdbb99824e444f3f3a9a56688d0912 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 19 Jan 2022 10:30:06 -0600
Subject: [PATCH 22/26] feat(wandb): job -> fine-tune

---
 openai/cli.py    |  8 ++++----
 openai/logger.py | 42 ++++++++++++++++++++++--------------------
 2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/openai/cli.py b/openai/cli.py
index c3a3de19ea..75d4f4e8a6 100644
--- a/openai/cli.py
+++ b/openai/cli.py
@@ -541,7 +541,7 @@ class Logger:
     def sync(cls, args):
         resp = openai.logger.Logger.sync(
             id=args.id,
-            n_jobs=args.n_jobs,
+            n_fine_tunes=args.n_fine_tunes,
             project=args.project,
             entity=args.entity,
             force=args.force,
@@ -984,10 +984,10 @@ def help(args):
     sub.add_argument("-i", "--id", help="The id of the fine-tune job (optional)")
     sub.add_argument(
         "-n",
-        "--n_jobs",
+        "--n_fine_tunes",
         type=int,
         default=None,
-        help="Number of most recent fine-tune jobs to log when an id is not provided. By default, every fine-tune is synced.",
+        help="Number of most recent fine-tunes to log when an id is not provided. By default, every fine-tune is synced.",
     )
     sub.add_argument(
         "--project",
@@ -1001,7 +1001,7 @@ def help(args):
     sub.add_argument(
         "--force",
         action="store_true",
-        help="Forces logging and overwrite existing wandb run of the same finetune job.",
+        help="Forces logging and overwrite existing wandb run of the same fine-tune.",
     )
     sub.set_defaults(force=False)
     sub.set_defaults(func=Logger.sync)
diff --git a/openai/logger.py b/openai/logger.py
index 33747adcaf..8b4f318ebf 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -20,7 +20,7 @@
 
 class Logger:
     """
-    Log fine-tune jobs to Weights & Biases
+    Log fine-tunes to Weights & Biases
     """
 
     if not WANDB_AVAILABLE:
@@ -33,19 +33,19 @@ class Logger:
     def sync(
         cls,
         id=None,
-        n_jobs=None,
+        n_fine_tunes=None,
         project="GPT-3",
         entity=None,
         force=False,
         **kwargs_wandb_init,
     ):
         """
-        Sync fine-tune job to Weights & Biases.
-        :param id: The id of the fine-tune job (optional)
-        :param n_jobs: Number of most recent fine-tune jobs to log when an id is not provided
+        Sync fine-tunes to Weights & Biases.
+        :param id: The id of the fine-tune (optional)
+        :param n_fine_tunes: Number of most recent fine-tunes to log when an id is not provided. By default, every fine-tune is synced.
         :param project: Name of the project where you're sending runs. By default, it is "GPT-3".
         :param entity: Username or team name where you're sending runs. By default, your default entity is used, which is usually your username.
-        :param force: Forces logging and overwrite existing wandb run of the same finetune job.
+        :param force: Forces logging and overwrite existing wandb run of the same fine-tune.
         """
 
         if not WANDB_AVAILABLE:
@@ -60,12 +60,14 @@ def sync(
             # get list of fine_tune to log
             fine_tunes = FineTune.list()
             if not fine_tunes or fine_tunes.get("data") is None:
-                print("No fine-tune jobs have been retrieved")
+                print("No fine-tune has been retrieved")
                 return
-            fine_tunes = fine_tunes["data"][-n_jobs if n_jobs is not None else None :]
+            fine_tunes = fine_tunes["data"][
+                -n_fine_tunes if n_fine_tunes is not None else None :
+            ]
 
         # log starting from oldest fine_tune
-        show_warnings = False if id is None and n_jobs is None else True
+        show_warnings = False if id is None and n_fine_tunes is None else True
         fine_tune_logged = [
             cls._log_fine_tune(
                 fine_tune,
@@ -79,7 +81,7 @@ def sync(
         ]
 
         if not show_warnings and not any(fine_tune_logged):
-            print("No new successful fine-tune were found")
+            print("No new successful fine-tunes were found")
 
         return "🎉 wandb sync completed successfully"
 
@@ -93,7 +95,7 @@ def _log_fine_tune(
         # check run completed successfully
         if show_warnings and status != "succeeded":
             print(
-                f'Fine-tune job {fine_tune_id} has the status "{status}" and will not be logged'
+                f'Fine-tune {fine_tune_id} has the status "{status}" and will not be logged'
             )
             return
 
@@ -107,7 +109,7 @@ def _log_fine_tune(
             if show_warnings:
                 if wandb_status == "succeeded":
                     print(
-                        f"Fine-tune job {fine_tune_id} has already been logged successfully at {wandb_run.url}"
+                        f"Fine-tune {fine_tune_id} has already been logged successfully at {wandb_run.url}"
                     )
                     if not force:
                         print(
@@ -115,11 +117,11 @@ def _log_fine_tune(
                         )
                 else:
                     print(
-                        f"A run for fine-tune job {fine_tune_id} was previously created but didn't end successfully"
+                        f"A run for fine-tune {fine_tune_id} was previously created but didn't end successfully"
                     )
                 if wandb_status != "succeeded" or force:
                     print(
-                        f"A new wandb run will be created for fine-tune job {fine_tune_id} and previous run will be overwritten"
+                        f"A new wandb run will be created for fine-tune {fine_tune_id} and previous run will be overwritten"
                     )
             if wandb_status == "succeeded" and not force:
                 return
@@ -130,7 +132,7 @@ def _log_fine_tune(
 
         # start a wandb run
         wandb.init(
-            job_type="finetune",
+            job_type="fine-tune",
             config=cls._get_config(fine_tune),
             project=project,
             entity=entity,
@@ -151,7 +153,7 @@ def _log_fine_tune(
         if fine_tuned_model is not None:
             wandb.summary["fine_tuned_model"] = fine_tuned_model
 
-        # training/validation files and job details
+        # training/validation files and fine-tune details
         cls._log_artifacts(fine_tune, project, entity)
 
         # mark run as complete
@@ -218,14 +220,14 @@ def _log_artifacts(cls, fine_tune, project, entity):
             if file is not None:
                 cls._log_artifact_inputs(file, prefix, artifact_type, project, entity)
 
-        # job details
+        # fine-tune details
         fine_tune_id = fine_tune.get("id")
         artifact = wandb.Artifact(
-            "job_details",
-            type="job_details",
+            "fine_tune_details",
+            type="fine_tune_details",
             metadata=fine_tune,
         )
-        with artifact.new_file("job_details.json") as f:
+        with artifact.new_file("fine_tune_details.json") as f:
             json.dump(fine_tune, f, indent=2)
         wandb.run.log_artifact(
             artifact,

From 0b1751de68c67bf1c18fd9886926e2f71a75c460 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 19 Jan 2022 10:34:38 -0600
Subject: [PATCH 23/26] refactor(wandb): use show_individual_warnings

---
 openai/logger.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/openai/logger.py b/openai/logger.py
index 8b4f318ebf..78bdcaf0ab 100644
--- a/openai/logger.py
+++ b/openai/logger.py
@@ -67,33 +67,41 @@ def sync(
             ]
 
         # log starting from oldest fine_tune
-        show_warnings = False if id is None and n_fine_tunes is None else True
+        show_individual_warnings = (
+            False if id is None and n_fine_tunes is None else True
+        )
         fine_tune_logged = [
             cls._log_fine_tune(
                 fine_tune,
                 project,
                 entity,
                 force,
-                show_warnings,
+                show_individual_warnings,
                 **kwargs_wandb_init,
             )
             for fine_tune in fine_tunes
         ]
 
-        if not show_warnings and not any(fine_tune_logged):
+        if not show_individual_warnings and not any(fine_tune_logged):
             print("No new successful fine-tunes were found")
 
         return "🎉 wandb sync completed successfully"
 
     @classmethod
     def _log_fine_tune(
-        cls, fine_tune, project, entity, force, show_warnings, **kwargs_wandb_init
+        cls,
+        fine_tune,
+        project,
+        entity,
+        force,
+        show_individual_warnings,
+        **kwargs_wandb_init,
     ):
         fine_tune_id = fine_tune.get("id")
         status = fine_tune.get("status")
 
         # check run completed successfully
-        if show_warnings and status != "succeeded":
+        if show_individual_warnings and status != "succeeded":
             print(
                 f'Fine-tune {fine_tune_id} has the status "{status}" and will not be logged'
             )
@@ -106,7 +114,7 @@ def _log_fine_tune(
         wandb_run = cls._get_wandb_run(run_path)
         if wandb_run:
             wandb_status = wandb_run.summary.get("status")
-            if show_warnings:
+            if show_individual_warnings:
                 if wandb_status == "succeeded":
                     print(
                         f"Fine-tune {fine_tune_id} has already been logged successfully at {wandb_run.url}"

From 81db437ff96d7d0330ce1175cef0caaf6db7ddf4 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 19 Jan 2022 10:50:11 -0600
Subject: [PATCH 24/26] feat(wandb): Logger -> WandbLogger

---
 openai/cli.py                         | 8 ++++----
 openai/{logger.py => wandb_logger.py} | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)
 rename openai/{logger.py => wandb_logger.py} (99%)

diff --git a/openai/cli.py b/openai/cli.py
index 75d4f4e8a6..c57d4c973e 100644
--- a/openai/cli.py
+++ b/openai/cli.py
@@ -9,7 +9,6 @@
 import requests
 
 import openai
-import openai.logger
 from openai.upload_progress import BufferReader
 from openai.validators import (
     apply_necessary_remediation,
@@ -20,6 +19,7 @@
     write_out_file,
     write_out_search_file,
 )
+import openai.wandb_logger
 
 
 class bcolors:
@@ -536,10 +536,10 @@ def prepare_data(cls, args):
         )
 
 
-class Logger:
+class WandbLogger:
     @classmethod
     def sync(cls, args):
-        resp = openai.logger.Logger.sync(
+        resp = openai.wandb_logger.WandbLogger.sync(
             id=args.id,
             n_fine_tunes=args.n_fine_tunes,
             project=args.project,
@@ -1004,4 +1004,4 @@ def help(args):
         help="Forces logging and overwrite existing wandb run of the same fine-tune.",
     )
     sub.set_defaults(force=False)
-    sub.set_defaults(func=Logger.sync)
+    sub.set_defaults(func=WandbLogger.sync)
diff --git a/openai/logger.py b/openai/wandb_logger.py
similarity index 99%
rename from openai/logger.py
rename to openai/wandb_logger.py
index 78bdcaf0ab..9740098e02 100644
--- a/openai/logger.py
+++ b/openai/wandb_logger.py
@@ -18,7 +18,7 @@
     from openai import File, FineTune
 
 
-class Logger:
+class WandbLogger:
     """
     Log fine-tunes to Weights & Biases
     """

From e6f154f7c81012ef86292c75bcc98179fe4fe014 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Wed, 19 Jan 2022 16:02:21 -0600
Subject: [PATCH 25/26] feat(wandb): retrive number of items from artifact

---
 openai/wandb_logger.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/openai/wandb_logger.py b/openai/wandb_logger.py
index 9740098e02..228d4b6431 100644
--- a/openai/wandb_logger.py
+++ b/openai/wandb_logger.py
@@ -278,6 +278,9 @@ def _log_artifact_inputs(cls, file, prefix, artifact_type, project, entity):
                 artifact.metadata["items"] = n_items
             except:
                 print(f"File {file_id} could not be read as a valid JSON file")
+        else:
+            # log number of items
+            wandb.config.update({f"n_{prefix}": artifact.metadata.get("items")})
 
         wandb.run.use_artifact(artifact, aliases=["latest", artifact_alias])
 

From 90c802bca79b2b6989e97ca9f0b972a7e9538671 Mon Sep 17 00:00:00 2001
From: Boris Dayma <boris.dayma@gmail.com>
Date: Thu, 27 Jan 2022 14:15:30 -0600
Subject: [PATCH 26/26] doc(wandb): add link to documentation

---
 README.md              | 7 +++++++
 openai/wandb_logger.py | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1aad1ff16b..9da57aaed8 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,7 @@ search = openai.Engine(id="deployment-namme").search(documents=["White House", "
 # print the search
 print(search)
 ```
+
 Please note that for the moment, the Microsoft Azure endpoints can only be used for completion and search operations.
 
 ### Command-line interface
@@ -142,6 +143,12 @@ Examples of fine tuning are shared in the following Jupyter notebooks:
   - [Step 2: Creating a synthetic Q&A dataset](https://github.com/openai/openai-python/blob/main/examples/finetuning/olympics-2-create-qa.ipynb)
   - [Step 3: Train a fine-tuning model specialized for Q&A](https://github.com/openai/openai-python/blob/main/examples/finetuning/olympics-3-train-qa.ipynb)
 
+Sync your fine-tunes to [Weights & Biases](https://wandb.me/openai-docs) to track experiments, models, and datasets in your central dashboard with:
+
+```bash
+openai wandb sync
+```
+
 For more information on fine tuning, read the [fine-tuning guide](https://beta.openai.com/docs/guides/fine-tuning) in the OpenAI documentation.
 
 ## Requirements
diff --git a/openai/wandb_logger.py b/openai/wandb_logger.py
index 228d4b6431..7bdacd711c 100644
--- a/openai/wandb_logger.py
+++ b/openai/wandb_logger.py
@@ -20,7 +20,7 @@
 
 class WandbLogger:
     """
-    Log fine-tunes to Weights & Biases
+    Log fine-tunes to [Weights & Biases](https://wandb.me/openai-docs)
     """
 
     if not WANDB_AVAILABLE: