From 9a6d1689364a47942b292a1cf2419d818cc2fdf3 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 18:19:48 -0500 Subject: [PATCH 01/26] feat: log fine_tune with wandb --- openai/cli.py | 30 +++++++++++++ openai/logger.py | 110 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 openai/logger.py diff --git a/openai/cli.py b/openai/cli.py index 872209f5bb..4102cc5b0e 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -13,6 +13,7 @@ read_any_format, write_out_file, ) +import openai.logger class bcolors: @@ -478,6 +479,15 @@ def prepare_data(cls, args): write_out_file(df, fname, any_optional_or_necessary_applied, auto_accept) +class Logger: + @classmethod + def log(cls, args): + resp = openai.logger.Logger.log( + id=args.id, n_jobs=args.n_jobs, project=args.project, entity=args.entity + ) + print(resp) + + def tools_register(parser): subparsers = parser.add_subparsers( title="Tools", help="Convenience client side tools" @@ -863,3 +873,23 @@ def help(args): sub = subparsers.add_parser("fine_tunes.cancel") sub.add_argument("-i", "--id", required=True, help="The id of the fine-tune job") sub.set_defaults(func=FineTune.cancel) + + sub = subparsers.add_parser("fine_tunes.wandb") + sub.add_argument("-i", "--id", help="The id of the fine-tune job") + sub.add_argument( + "-n", + "--n_jobs", + type=int, + default=10, + help="Number of most recent fine-tune jobs to log when an id is not provided", + ) + sub.add_argument( + "--project", + default="GPT-3", + help="""Name of the project where you're sending runs. By default, it is "GPT-3".""", + ) + sub.add_argument( + "--entity", + help="Username or team name where you're sending runs. By default, your default entity is used, which is usually your username.", + ) + sub.set_defaults(func=Logger.log) diff --git a/openai/logger.py b/openai/logger.py new file mode 100644 index 0000000000..bb2dd49c3e --- /dev/null +++ b/openai/logger.py @@ -0,0 +1,110 @@ +try: + import wandb + + WANDB_AVAILABLE = True +except: + WANDB_AVAILABLE = False + + +if WANDB_AVAILABLE: + from openai import FineTune, File + import io + import numpy as np + import pandas as pd + + +class Logger: + if not WANDB_AVAILABLE: + print("WandbLogger requires wandb to be installed. Run `pip install wandb`.") + else: + _wandb_api = wandb.Api() + + @classmethod + def log( + cls, + id=None, + n_jobs=10, + project="GPT-3", + entity=None, + **kwargs_wandb_init, + ): + # TODO: add docstring + + if not WANDB_AVAILABLE: + return + + if id: + fine_tune = FineTune.retrieve(id=id) + fine_tune.pop("events", None) + fine_tunes = [fine_tune] + + else: + # get list of fine_tune to log + fine_tunes = FineTune.list() + if not fine_tunes or fine_tunes.get("data") is None: + print("No fine-tune jobs have been retrieved") + return + fine_tunes = fine_tunes["data"][-n_jobs:] + + # log starting from oldest fine_tune + for fine_tune in fine_tunes: + cls._log_fine_tune(fine_tune, project, entity, **kwargs_wandb_init) + return "Command completed successfully" + + @classmethod + def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init): + fine_tune_id = fine_tune.get("id") + status = fine_tune.get("status") + + # check run completed successfully + if status != "succeeded": + print( + f'Fine-tune job {fine_tune_id} has the status "{status}" and will not be logged' + ) + + # check run has not been logged already + run_path = f"{project}/{fine_tune_id}" + if entity is not None: + run_path = f"{entity}/{run_path}" + wandb_run = cls._get_wandb_run(run_path) + if cls._get_wandb_run(run_path): + print( + f"Fine-tune job {fine_tune_id} has already been logged at {wandb_run.url}" + ) + return + # TODO: add a "force" argument + + # retrieve results + results_id = fine_tune["result_files"][0]["id"] + results = File.download(id=results_id).decode("utf-8") + + # start a wandb run + wandb.init( + job_type="finetune", + config=fine_tune, + project=project, + entity=entity, + name=fine_tune_id, + id=fine_tune_id, + **kwargs_wandb_init, + ) + + # log results + df_results = pd.read_csv(io.StringIO(results)) + for _, row in df_results.iterrows(): + metrics = {k: v for k, v in row.items() if not np.isnan(v)} + step = metrics.pop("step") + if step is not None: + step = int(step) + wandb.log(metrics, step=step) + + # TODO: retrieve training/validation files if not already present + # TODO: mark the run as successful so we can overwrite it in case it did not log properly + wandb.finish() + + @classmethod + def _get_wandb_run(cls, run_path): + try: + return cls._wandb_api.run(run_path) + except Exception as e: + return False From a24678575915fd957e6672f74e99de05da1e06a8 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 20:00:44 -0500 Subject: [PATCH 02/26] feat: ensure we are logged in --- openai/logger.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/openai/logger.py b/openai/logger.py index bb2dd49c3e..12ca3e5fbc 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -15,9 +15,10 @@ class Logger: if not WANDB_AVAILABLE: - print("WandbLogger requires wandb to be installed. Run `pip install wandb`.") + print("Logging requires wandb to be installed. Run `pip install wandb`.") else: _wandb_api = wandb.Api() + _logged_in = False @classmethod def log( @@ -102,8 +103,17 @@ def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init): # TODO: mark the run as successful so we can overwrite it in case it did not log properly wandb.finish() + @classmethod + def _ensure_logged_in(cls): + if not cls._logged_in: + if wandb.login(): + cls._logged_in = True + else: + raise Exception("You need to log in to wandb") + @classmethod def _get_wandb_run(cls, run_path): + cls._ensure_logged_in() try: return cls._wandb_api.run(run_path) except Exception as e: From b24d1b6d21149974cf1c01c82a6feb73f6803cd2 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 20:08:52 -0500 Subject: [PATCH 03/26] feat: cli wandb namespace --- bin/openai | 4 +++- openai/cli.py | 13 ++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/bin/openai b/bin/openai index 3c34b69347..d234256c62 100755 --- a/bin/openai +++ b/bin/openai @@ -4,7 +4,7 @@ import logging import sys import openai -from openai.cli import api_register, display_error, tools_register +from openai.cli import api_register, display_error, tools_register, wandb_register logger = logging.getLogger() formatter = logging.Formatter("[%(asctime)s] %(message)s") @@ -39,9 +39,11 @@ def main(): subparsers = parser.add_subparsers() sub_api = subparsers.add_parser("api", help="Direct API calls") sub_tools = subparsers.add_parser("tools", help="Client side tools for convenience") + sub_wandb = subparsers.add_parser("wandb", help="Logging with Weights & Biases") api_register(sub_api) tools_register(sub_tools) + wandb_register(sub_wandb) args = parser.parse_args() if args.verbosity == 1: diff --git a/openai/cli.py b/openai/cli.py index 4102cc5b0e..6cc637baaf 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -874,7 +874,18 @@ def help(args): sub.add_argument("-i", "--id", required=True, help="The id of the fine-tune job") sub.set_defaults(func=FineTune.cancel) - sub = subparsers.add_parser("fine_tunes.wandb") + +def wandb_register(parser): + subparsers = parser.add_subparsers( + title="wandb", help="Logging with Weights & Biases" + ) + + def help(args): + parser.print_help() + + parser.set_defaults(func=help) + + sub = subparsers.add_parser("log") sub.add_argument("-i", "--id", help="The id of the fine-tune job") sub.add_argument( "-n", From dfb67fa67e82ec93dba9929fe4aecace0eb0580a Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 20:17:37 -0500 Subject: [PATCH 04/26] =?UTF-8?q?feat:=C2=A0add=20fine=5Ftuned=5Fmodel=20t?= =?UTF-8?q?o=20summary?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openai/logger.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openai/logger.py b/openai/logger.py index 12ca3e5fbc..46185c3d26 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -98,6 +98,9 @@ def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init): if step is not None: step = int(step) wandb.log(metrics, step=step) + fine_tuned_model = fine_tune.get("fine_tuned_model") + if fine_tuned_model is not None: + wandb.summary["fine_tuned_model"] = fine_tuned_model # TODO: retrieve training/validation files if not already present # TODO: mark the run as successful so we can overwrite it in case it did not log properly From 1036d6d7e5bd7a8f4d0415a008536e3469b3607b Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 21:21:39 -0500 Subject: [PATCH 05/26] feat: log training & validation files --- openai/logger.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/openai/logger.py b/openai/logger.py index 46185c3d26..6488f1b600 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -11,6 +11,7 @@ import io import numpy as np import pandas as pd + from pathlib import Path class Logger: @@ -102,7 +103,9 @@ def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init): if fine_tuned_model is not None: wandb.summary["fine_tuned_model"] = fine_tuned_model - # TODO: retrieve training/validation files if not already present + # training/validation files + cls._log_artifacts(fine_tune) + # TODO: mark the run as successful so we can overwrite it in case it did not log properly wandb.finish() @@ -121,3 +124,48 @@ def _get_wandb_run(cls, run_path): return cls._wandb_api.run(run_path) except Exception as e: return False + + @classmethod + def _log_artifacts(cls, fine_tune): + training_file = ( + fine_tune["training_files"][0] if fine_tune.get("training_files") else None + ) + validation_file = ( + fine_tune["validation_files"][0] + if fine_tune.get("validation_files") + else None + ) + for file, prefix in ((training_file, "train"), (validation_file, "valid")): + cls._log_artifact(file, prefix) + + @classmethod + def _log_artifact(cls, file, prefix): + file_id = file["id"] + filename = Path(file["filename"]).name + stem = Path(file["filename"]).stem + + # get file content + try: + file_content = File.download(id=file_id).decode("utf-8") + except: + print( + f"File {file_id} could not be retrieved. Make sure you are allowed to download training/validation files" + ) + return + artifact = wandb.Artifact(f"{prefix}-{filename}", type=prefix, metadata=file) + with artifact.new_file(filename, mode="w") as f: + f.write(file_content) + + # create a Table + try: + table = cls._make_table(file_content) + artifact.add(table, stem) + except: + print(f"File {file_id} could not be read as a valid JSON file") + + wandb.run.log_artifact(artifact, aliases=[file_id, "latest"]) + + @classmethod + def _make_table(cls, file_content): + df = pd.read_json(io.StringIO(file_content), orient="records", lines=True) + return wandb.Table(dataframe=df) From bb3def60c0c10dbf00857edad78da565be304832 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 21:49:28 -0500 Subject: [PATCH 06/26] feat: re-log if was not successful or force --- openai/cli.py | 12 +++++++++++- openai/logger.py | 31 ++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/openai/cli.py b/openai/cli.py index 6cc637baaf..54f57ef478 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -483,7 +483,11 @@ class Logger: @classmethod def log(cls, args): resp = openai.logger.Logger.log( - id=args.id, n_jobs=args.n_jobs, project=args.project, entity=args.entity + id=args.id, + n_jobs=args.n_jobs, + project=args.project, + entity=args.entity, + force=args.force, ) print(resp) @@ -903,4 +907,10 @@ def help(args): "--entity", help="Username or team name where you're sending runs. By default, your default entity is used, which is usually your username.", ) + sub.add_argument( + "--force", + action="store_true", + help="Forces logging and overwrite existing wandb run of the same finetune job.", + ) + sub.set_defaults(force=False) sub.set_defaults(func=Logger.log) diff --git a/openai/logger.py b/openai/logger.py index 6488f1b600..1dda17197b 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -28,6 +28,7 @@ def log( n_jobs=10, project="GPT-3", entity=None, + force=False, **kwargs_wandb_init, ): # TODO: add docstring @@ -50,11 +51,11 @@ def log( # log starting from oldest fine_tune for fine_tune in fine_tunes: - cls._log_fine_tune(fine_tune, project, entity, **kwargs_wandb_init) + cls._log_fine_tune(fine_tune, project, entity, force, **kwargs_wandb_init) return "Command completed successfully" @classmethod - def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init): + def _log_fine_tune(cls, fine_tune, project, entity, force, **kwargs_wandb_init): fine_tune_id = fine_tune.get("id") status = fine_tune.get("status") @@ -69,12 +70,22 @@ def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init): if entity is not None: run_path = f"{entity}/{run_path}" wandb_run = cls._get_wandb_run(run_path) - if cls._get_wandb_run(run_path): - print( - f"Fine-tune job {fine_tune_id} has already been logged at {wandb_run.url}" - ) - return - # TODO: add a "force" argument + if wandb_run: + wandb_status = wandb_run.summary.get("status") + if wandb_status == "succeeded": + print( + f"Fine-tune job {fine_tune_id} has already been logged successfully at {wandb_run.url}" + ) + if not force: + print( + 'Use "--force" in the CLI or "force=True" in python if you want to overwrite previous run' + ) + if wandb_status != "succeeded" or force: + print( + f"A new wandb run will be created for fine-tune job {fine_tune_id} and previous run will be overwritten" + ) + else: + return # retrieve results results_id = fine_tune["result_files"][0]["id"] @@ -106,7 +117,9 @@ def _log_fine_tune(cls, fine_tune, project, entity, **kwargs_wandb_init): # training/validation files cls._log_artifacts(fine_tune) - # TODO: mark the run as successful so we can overwrite it in case it did not log properly + # mark run as complete + wandb.summary["status"] = "succeeded" + wandb.finish() @classmethod From 2d34eb28a407591aa9cc684e29ccd3712de5df1b Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 21:55:55 -0500 Subject: [PATCH 07/26] doc: add docstring --- openai/logger.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/openai/logger.py b/openai/logger.py index 1dda17197b..fe26debdb2 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -15,6 +15,10 @@ class Logger: + """ + Log fine-tune jobs to Weights & Biases + """ + if not WANDB_AVAILABLE: print("Logging requires wandb to be installed. Run `pip install wandb`.") else: @@ -31,7 +35,14 @@ def log( force=False, **kwargs_wandb_init, ): - # TODO: add docstring + """ + Log a fine-tune jobs to Weights & Biases. + :param id: The id of the fine-tune job (optional) + :param n_jobs: Number of most recent fine-tune jobs to log when an id is not provided + :param project: Name of the project where you're sending runs. By default, it is "GPT-3". + :param entity: Username or team name where you're sending runs. By default, your default entity is used, which is usually your username. + :param force: Forces logging and overwrite existing wandb run of the same finetune job. + """ if not WANDB_AVAILABLE: return From 0b0f4566cdc62d4142c04fb02556b9915805f185 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 22:21:12 -0500 Subject: [PATCH 08/26] feat: set wandb api only when needed --- openai/logger.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openai/logger.py b/openai/logger.py index fe26debdb2..9910386b2e 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -22,7 +22,7 @@ class Logger: if not WANDB_AVAILABLE: print("Logging requires wandb to be installed. Run `pip install wandb`.") else: - _wandb_api = wandb.Api() + _wandb_api = False _logged_in = False @classmethod @@ -145,6 +145,8 @@ def _ensure_logged_in(cls): def _get_wandb_run(cls, run_path): cls._ensure_logged_in() try: + if cls._wandb_api is None: + cls._wandb_api = wandb.Api() return cls._wandb_api.run(run_path) except Exception as e: return False From c948a2e7534fbf05b2a33a3a119996420bd44e2c Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 22:38:40 -0500 Subject: [PATCH 09/26] fix: train/validation files are inputs --- openai/logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openai/logger.py b/openai/logger.py index 9910386b2e..768392a61a 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -189,7 +189,7 @@ def _log_artifact(cls, file, prefix): except: print(f"File {file_id} could not be read as a valid JSON file") - wandb.run.log_artifact(artifact, aliases=[file_id, "latest"]) + wandb.run.use_artifact(artifact, aliases=[file_id, "latest"]) @classmethod def _make_table(cls, file_content): From 1d102353910b9df706cc6dff118ce794488dc5a1 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 22:43:04 -0500 Subject: [PATCH 10/26] feat: rename artifact type --- openai/logger.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/openai/logger.py b/openai/logger.py index 768392a61a..0a5013dc19 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -161,11 +161,14 @@ def _log_artifacts(cls, fine_tune): if fine_tune.get("validation_files") else None ) - for file, prefix in ((training_file, "train"), (validation_file, "valid")): - cls._log_artifact(file, prefix) + for file, prefix, artifact_type in ( + (training_file, "train", "training_files"), + (validation_file, "valid", "validation_files"), + ): + cls._log_artifact(file, prefix, artifact_type) @classmethod - def _log_artifact(cls, file, prefix): + def _log_artifact(cls, file, prefix, artifact_type): file_id = file["id"] filename = Path(file["filename"]).name stem = Path(file["filename"]).stem @@ -178,7 +181,9 @@ def _log_artifact(cls, file, prefix): f"File {file_id} could not be retrieved. Make sure you are allowed to download training/validation files" ) return - artifact = wandb.Artifact(f"{prefix}-{filename}", type=prefix, metadata=file) + artifact = wandb.Artifact( + f"{prefix}-{filename}", type=artifact_type, metadata=file + ) with artifact.new_file(filename, mode="w") as f: f.write(file_content) From 146cdaa39793e1a70e80839e5870a549ef7c948a Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 3 Nov 2021 23:29:24 -0500 Subject: [PATCH 11/26] =?UTF-8?q?feat:=C2=A0improve=20config=20logging?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openai/logger.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/openai/logger.py b/openai/logger.py index 0a5013dc19..4f6e5f1507 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -105,7 +105,7 @@ def _log_fine_tune(cls, fine_tune, project, entity, force, **kwargs_wandb_init): # start a wandb run wandb.init( job_type="finetune", - config=fine_tune, + config=cls._get_config(fine_tune), project=project, entity=entity, name=fine_tune_id, @@ -151,14 +151,24 @@ def _get_wandb_run(cls, run_path): except Exception as e: return False + @classmethod + def _get_config(cls, fine_tune): + config = dict(fine_tune) + for key in ("training_files", "validation_files", "result_files"): + if config.get(key) and len(config[key]): + config[key] = config[key][0] + return config + @classmethod def _log_artifacts(cls, fine_tune): training_file = ( - fine_tune["training_files"][0] if fine_tune.get("training_files") else None + fine_tune["training_files"][0] + if fine_tune.get("training_files") and len(fine_tune["training_files"]) + else None ) validation_file = ( fine_tune["validation_files"][0] - if fine_tune.get("validation_files") + if fine_tune.get("validation_files") and len(fine_tune["validation_files"]) else None ) for file, prefix, artifact_type in ( From ce7352e35b06b995e8defff96fd06c1ada0ef95b Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Fri, 5 Nov 2021 11:20:47 -0500 Subject: [PATCH 12/26] =?UTF-8?q?feat:=C2=A0log=20all=20jobs=20by=20defaul?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openai/cli.py | 2 +- openai/logger.py | 63 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 44 insertions(+), 21 deletions(-) diff --git a/openai/cli.py b/openai/cli.py index 54f57ef478..71a5bd7b1d 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -895,7 +895,7 @@ def help(args): "-n", "--n_jobs", type=int, - default=10, + default=None, help="Number of most recent fine-tune jobs to log when an id is not provided", ) sub.add_argument( diff --git a/openai/logger.py b/openai/logger.py index 4f6e5f1507..fc22790ce3 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -22,14 +22,14 @@ class Logger: if not WANDB_AVAILABLE: print("Logging requires wandb to be installed. Run `pip install wandb`.") else: - _wandb_api = False + _wandb_api = None _logged_in = False @classmethod def log( cls, id=None, - n_jobs=10, + n_jobs=None, project="GPT-3", entity=None, force=False, @@ -58,23 +58,40 @@ def log( if not fine_tunes or fine_tunes.get("data") is None: print("No fine-tune jobs have been retrieved") return - fine_tunes = fine_tunes["data"][-n_jobs:] + fine_tunes = fine_tunes["data"][-n_jobs if n_jobs is not None else None :] # log starting from oldest fine_tune - for fine_tune in fine_tunes: - cls._log_fine_tune(fine_tune, project, entity, force, **kwargs_wandb_init) - return "Command completed successfully" + show_warnings = False if id is None and n_jobs is None else True + fine_tune_logged = [ + cls._log_fine_tune( + fine_tune, + project, + entity, + force, + show_warnings, + **kwargs_wandb_init, + ) + for fine_tune in fine_tunes + ] + + if not show_warnings and not any(fine_tune_logged): + print("No new successful fine-tune were found") + + return "🎉 wandb log completed successfully" @classmethod - def _log_fine_tune(cls, fine_tune, project, entity, force, **kwargs_wandb_init): + def _log_fine_tune( + cls, fine_tune, project, entity, force, show_warnings, **kwargs_wandb_init + ): fine_tune_id = fine_tune.get("id") status = fine_tune.get("status") # check run completed successfully - if status != "succeeded": + if show_warnings and status != "succeeded": print( f'Fine-tune job {fine_tune_id} has the status "{status}" and will not be logged' ) + return # check run has not been logged already run_path = f"{project}/{fine_tune_id}" @@ -83,19 +100,24 @@ def _log_fine_tune(cls, fine_tune, project, entity, force, **kwargs_wandb_init): wandb_run = cls._get_wandb_run(run_path) if wandb_run: wandb_status = wandb_run.summary.get("status") - if wandb_status == "succeeded": - print( - f"Fine-tune job {fine_tune_id} has already been logged successfully at {wandb_run.url}" - ) - if not force: + if show_warnings: + if wandb_status == "succeeded": print( - 'Use "--force" in the CLI or "force=True" in python if you want to overwrite previous run' + f"Fine-tune job {fine_tune_id} has already been logged successfully at {wandb_run.url}" ) - if wandb_status != "succeeded" or force: - print( - f"A new wandb run will be created for fine-tune job {fine_tune_id} and previous run will be overwritten" - ) - else: + if not force: + print( + 'Use "--force" in the CLI or "force=True" in python if you want to overwrite previous run' + ) + else: + print( + f"A run for fine-tune job {fine_tune_id} was previously created but didn't end successfully" + ) + if wandb_status != "succeeded" or force: + print( + f"A new wandb run will be created for fine-tune job {fine_tune_id} and previous run will be overwritten" + ) + if wandb_status == "succeeded": return # retrieve results @@ -132,6 +154,7 @@ def _log_fine_tune(cls, fine_tune, project, entity, force, **kwargs_wandb_init): wandb.summary["status"] = "succeeded" wandb.finish() + return True @classmethod def _ensure_logged_in(cls): @@ -148,7 +171,7 @@ def _get_wandb_run(cls, run_path): if cls._wandb_api is None: cls._wandb_api = wandb.Api() return cls._wandb_api.run(run_path) - except Exception as e: + except Exception: return False @classmethod From 5e6dbe94910b9281e250fafbd50a311a78216d11 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Fri, 5 Nov 2021 11:40:15 -0500 Subject: [PATCH 13/26] feat: log job details --- openai/logger.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/openai/logger.py b/openai/logger.py index fc22790ce3..2fb44accf7 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -9,6 +9,7 @@ if WANDB_AVAILABLE: from openai import FineTune, File import io + import json import numpy as np import pandas as pd from pathlib import Path @@ -147,7 +148,7 @@ def _log_fine_tune( if fine_tuned_model is not None: wandb.summary["fine_tuned_model"] = fine_tuned_model - # training/validation files + # training/validation files and job details cls._log_artifacts(fine_tune) # mark run as complete @@ -184,6 +185,7 @@ def _get_config(cls, fine_tune): @classmethod def _log_artifacts(cls, fine_tune): + # training/validation files training_file = ( fine_tune["training_files"][0] if fine_tune.get("training_files") and len(fine_tune["training_files"]) @@ -198,10 +200,24 @@ def _log_artifacts(cls, fine_tune): (training_file, "train", "training_files"), (validation_file, "valid", "validation_files"), ): - cls._log_artifact(file, prefix, artifact_type) + cls._log_artifact_inputs(file, prefix, artifact_type) + + # job details + fine_tune_id = fine_tune.get("id") + artifact = wandb.Artifact( + "job_details", + type="job_details", + metadata=fine_tune, + ) + with artifact.new_file("job_details.json") as f: + json.dump(fine_tune, f, indent=2) + wandb.run.log_artifact( + artifact, + aliases=[fine_tune_id, "latest"], + ) @classmethod - def _log_artifact(cls, file, prefix, artifact_type): + def _log_artifact_inputs(cls, file, prefix, artifact_type): file_id = file["id"] filename = Path(file["filename"]).name stem = Path(file["filename"]).stem From 00111baeb67cfe6c27bd111dee2ae0f4c22b1a59 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Sat, 6 Nov 2021 10:03:56 -0500 Subject: [PATCH 14/26] feat: log -> sync --- openai/cli.py | 6 +++--- openai/logger.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/openai/cli.py b/openai/cli.py index 71a5bd7b1d..803e9f780f 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -889,14 +889,14 @@ def help(args): parser.set_defaults(func=help) - sub = subparsers.add_parser("log") - sub.add_argument("-i", "--id", help="The id of the fine-tune job") + sub = subparsers.add_parser("sync") + sub.add_argument("-i", "--id", help="The id of the fine-tune job (optional)") sub.add_argument( "-n", "--n_jobs", type=int, default=None, - help="Number of most recent fine-tune jobs to log when an id is not provided", + help="Number of most recent fine-tune jobs to log when an id is not provided. By default, every fine-tune is synced.", ) sub.add_argument( "--project", diff --git a/openai/logger.py b/openai/logger.py index 2fb44accf7..1ea10845fc 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -27,7 +27,7 @@ class Logger: _logged_in = False @classmethod - def log( + def sync( cls, id=None, n_jobs=None, @@ -37,7 +37,7 @@ def log( **kwargs_wandb_init, ): """ - Log a fine-tune jobs to Weights & Biases. + Sync fine-tune job to Weights & Biases. :param id: The id of the fine-tune job (optional) :param n_jobs: Number of most recent fine-tune jobs to log when an id is not provided :param project: Name of the project where you're sending runs. By default, it is "GPT-3". From 9a3edcba0fcfbf54c8ae95d7f96589ef47752ad1 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Mon, 8 Nov 2021 21:01:10 -0600 Subject: [PATCH 15/26] feat: cli wandb log -> sync --- openai/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openai/cli.py b/openai/cli.py index 803e9f780f..ad6120a1bb 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -481,8 +481,8 @@ def prepare_data(cls, args): class Logger: @classmethod - def log(cls, args): - resp = openai.logger.Logger.log( + def sync(cls, args): + resp = openai.logger.Logger.sync( id=args.id, n_jobs=args.n_jobs, project=args.project, @@ -913,4 +913,4 @@ def help(args): help="Forces logging and overwrite existing wandb run of the same finetune job.", ) sub.set_defaults(force=False) - sub.set_defaults(func=Logger.log) + sub.set_defaults(func=Logger.sync) From 2c151f3ae5ff7a6de3d80eb4a0914f29e79beecc Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Thu, 2 Dec 2021 16:05:23 -0600 Subject: [PATCH 16/26] fix: validation_files not always present --- openai/logger.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openai/logger.py b/openai/logger.py index 1ea10845fc..7ed92c6ae7 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -199,8 +199,9 @@ def _log_artifacts(cls, fine_tune): for file, prefix, artifact_type in ( (training_file, "train", "training_files"), (validation_file, "valid", "validation_files"), - ): - cls._log_artifact_inputs(file, prefix, artifact_type) + ): + if file is not None: + cls._log_artifact_inputs(file, prefix, artifact_type) # job details fine_tune_id = fine_tune.get("id") From 9eccf8409fa753648ff1d0ff8584d04786c5d0e0 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Thu, 2 Dec 2021 21:00:10 -0600 Subject: [PATCH 17/26] =?UTF-8?q?feat:=E2=80=AFformat=20created=5Fat=20+?= =?UTF-8?q?=20style?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openai/cli.py | 2 +- openai/logger.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/openai/cli.py b/openai/cli.py index ad6120a1bb..211d2591f0 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -5,6 +5,7 @@ import warnings import openai +import openai.logger from openai.upload_progress import BufferReader from openai.validators import ( apply_necessary_remediation, @@ -13,7 +14,6 @@ read_any_format, write_out_file, ) -import openai.logger class bcolors: diff --git a/openai/logger.py b/openai/logger.py index 7ed92c6ae7..434a0ccbce 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -7,12 +7,15 @@ if WANDB_AVAILABLE: - from openai import FineTune, File + import datetime import io import json + from pathlib import Path + import numpy as np import pandas as pd - from pathlib import Path + + from openai import File, FineTune class Logger: @@ -181,6 +184,8 @@ def _get_config(cls, fine_tune): for key in ("training_files", "validation_files", "result_files"): if config.get(key) and len(config[key]): config[key] = config[key][0] + if config.get("created_at"): + config["created_at"] = datetime.datetime.fromtimestamp(config["created_at"]) return config @classmethod @@ -199,7 +204,7 @@ def _log_artifacts(cls, fine_tune): for file, prefix, artifact_type in ( (training_file, "train", "training_files"), (validation_file, "valid", "validation_files"), - ): + ): if file is not None: cls._log_artifact_inputs(file, prefix, artifact_type) From 84c2cbdf83912f9e3be9cb8c9066140b9b8dc6ab Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Thu, 2 Dec 2021 21:11:40 -0600 Subject: [PATCH 18/26] feat: log number of training/validation samples --- openai/logger.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openai/logger.py b/openai/logger.py index 434a0ccbce..c5b11f45d2 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -244,8 +244,9 @@ def _log_artifact_inputs(cls, file, prefix, artifact_type): # create a Table try: - table = cls._make_table(file_content) + table, n_items = cls._make_table(file_content) artifact.add(table, stem) + wandb.config.update({f"n_{prefix}": n_items}) except: print(f"File {file_id} could not be read as a valid JSON file") @@ -254,4 +255,4 @@ def _log_artifact_inputs(cls, file, prefix, artifact_type): @classmethod def _make_table(cls, file_content): df = pd.read_json(io.StringIO(file_content), orient="records", lines=True) - return wandb.Table(dataframe=df) + return wandb.Table(dataframe=df), len(df) From 8ac09d3c4331bb574d95cbca04deb68eef88d55b Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Tue, 18 Jan 2022 14:25:05 -0600 Subject: [PATCH 19/26] feat(wandb): avoid download if file already synced --- openai/logger.py | 78 +++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/openai/logger.py b/openai/logger.py index c5b11f45d2..8d5be6a459 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -81,7 +81,7 @@ def sync( if not show_warnings and not any(fine_tune_logged): print("No new successful fine-tune were found") - return "🎉 wandb log completed successfully" + return "🎉 wandb sync completed successfully" @classmethod def _log_fine_tune( @@ -152,7 +152,7 @@ def _log_fine_tune( wandb.summary["fine_tuned_model"] = fine_tuned_model # training/validation files and job details - cls._log_artifacts(fine_tune) + cls._log_artifacts(fine_tune, project, entity) # mark run as complete wandb.summary["status"] = "succeeded" @@ -176,7 +176,17 @@ def _get_wandb_run(cls, run_path): cls._wandb_api = wandb.Api() return cls._wandb_api.run(run_path) except Exception: - return False + return None + + @classmethod + def _get_wandb_artifact(cls, artifact_path): + cls._ensure_logged_in() + try: + if cls._wandb_api is None: + cls._wandb_api = wandb.Api() + return cls._wandb_api.artifact(artifact_path) + except Exception: + return None @classmethod def _get_config(cls, fine_tune): @@ -189,7 +199,7 @@ def _get_config(cls, fine_tune): return config @classmethod - def _log_artifacts(cls, fine_tune): + def _log_artifacts(cls, fine_tune, project, entity): # training/validation files training_file = ( fine_tune["training_files"][0] @@ -206,7 +216,7 @@ def _log_artifacts(cls, fine_tune): (validation_file, "valid", "validation_files"), ): if file is not None: - cls._log_artifact_inputs(file, prefix, artifact_type) + cls._log_artifact_inputs(file, prefix, artifact_type, project, entity) # job details fine_tune_id = fine_tune.get("id") @@ -219,38 +229,46 @@ def _log_artifacts(cls, fine_tune): json.dump(fine_tune, f, indent=2) wandb.run.log_artifact( artifact, - aliases=[fine_tune_id, "latest"], + aliases=["latest", fine_tune_id], ) @classmethod - def _log_artifact_inputs(cls, file, prefix, artifact_type): + def _log_artifact_inputs(cls, file, prefix, artifact_type, project, entity): file_id = file["id"] filename = Path(file["filename"]).name stem = Path(file["filename"]).stem - # get file content - try: - file_content = File.download(id=file_id).decode("utf-8") - except: - print( - f"File {file_id} could not be retrieved. Make sure you are allowed to download training/validation files" - ) - return - artifact = wandb.Artifact( - f"{prefix}-{filename}", type=artifact_type, metadata=file - ) - with artifact.new_file(filename, mode="w") as f: - f.write(file_content) - - # create a Table - try: - table, n_items = cls._make_table(file_content) - artifact.add(table, stem) - wandb.config.update({f"n_{prefix}": n_items}) - except: - print(f"File {file_id} could not be read as a valid JSON file") - - wandb.run.use_artifact(artifact, aliases=[file_id, "latest"]) + # get input artifact + artifact_name = f"{prefix}-{filename}" + artifact_alias = file_id + artifact_path = f"{project}/{artifact_name}:{artifact_alias}" + if entity is not None: + artifact_path = f"{entity}/{artifact_path}" + artifact = cls._get_wandb_artifact(artifact_path) + + # create artifact if file not already logged previously + if artifact is None: + # get file content + try: + file_content = File.download(id=file_id).decode("utf-8") + except: + print( + f"File {file_id} could not be retrieved. Make sure you are allowed to download training/validation files" + ) + return + artifact = wandb.Artifact(artifact_name, type=artifact_type, metadata=file) + with artifact.new_file(filename, mode="w") as f: + f.write(file_content) + + # create a Table + try: + table, n_items = cls._make_table(file_content) + artifact.add(table, stem) + wandb.config.update({f"n_{prefix}": n_items}) + except: + print(f"File {file_id} could not be read as a valid JSON file") + + wandb.run.use_artifact(artifact, aliases=["latest", artifact_alias]) @classmethod def _make_table(cls, file_content): From 08ef4de2531acf67e4830e44b0994109211a15d6 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Tue, 18 Jan 2022 14:31:31 -0600 Subject: [PATCH 20/26] feat(wandb): add number of items to metadata --- openai/logger.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openai/logger.py b/openai/logger.py index 8d5be6a459..6c51bb72cd 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -265,6 +265,7 @@ def _log_artifact_inputs(cls, file, prefix, artifact_type, project, entity): table, n_items = cls._make_table(file_content) artifact.add(table, stem) wandb.config.update({f"n_{prefix}": n_items}) + artifact.metadata["items"] = n_items except: print(f"File {file_id} could not be read as a valid JSON file") From 9c3738c7a7832e9d063a90c9fd18c7a61f41bf20 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Tue, 18 Jan 2022 14:51:12 -0600 Subject: [PATCH 21/26] fix(wandb): allow force sync --- openai/logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openai/logger.py b/openai/logger.py index 6c51bb72cd..33747adcaf 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -121,7 +121,7 @@ def _log_fine_tune( print( f"A new wandb run will be created for fine-tune job {fine_tune_id} and previous run will be overwritten" ) - if wandb_status == "succeeded": + if wandb_status == "succeeded" and not force: return # retrieve results From 39e747c981fdbb99824e444f3f3a9a56688d0912 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 19 Jan 2022 10:30:06 -0600 Subject: [PATCH 22/26] feat(wandb): job -> fine-tune --- openai/cli.py | 8 ++++---- openai/logger.py | 42 ++++++++++++++++++++++-------------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/openai/cli.py b/openai/cli.py index c3a3de19ea..75d4f4e8a6 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -541,7 +541,7 @@ class Logger: def sync(cls, args): resp = openai.logger.Logger.sync( id=args.id, - n_jobs=args.n_jobs, + n_fine_tunes=args.n_fine_tunes, project=args.project, entity=args.entity, force=args.force, @@ -984,10 +984,10 @@ def help(args): sub.add_argument("-i", "--id", help="The id of the fine-tune job (optional)") sub.add_argument( "-n", - "--n_jobs", + "--n_fine_tunes", type=int, default=None, - help="Number of most recent fine-tune jobs to log when an id is not provided. By default, every fine-tune is synced.", + help="Number of most recent fine-tunes to log when an id is not provided. By default, every fine-tune is synced.", ) sub.add_argument( "--project", @@ -1001,7 +1001,7 @@ def help(args): sub.add_argument( "--force", action="store_true", - help="Forces logging and overwrite existing wandb run of the same finetune job.", + help="Forces logging and overwrite existing wandb run of the same fine-tune.", ) sub.set_defaults(force=False) sub.set_defaults(func=Logger.sync) diff --git a/openai/logger.py b/openai/logger.py index 33747adcaf..8b4f318ebf 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -20,7 +20,7 @@ class Logger: """ - Log fine-tune jobs to Weights & Biases + Log fine-tunes to Weights & Biases """ if not WANDB_AVAILABLE: @@ -33,19 +33,19 @@ class Logger: def sync( cls, id=None, - n_jobs=None, + n_fine_tunes=None, project="GPT-3", entity=None, force=False, **kwargs_wandb_init, ): """ - Sync fine-tune job to Weights & Biases. - :param id: The id of the fine-tune job (optional) - :param n_jobs: Number of most recent fine-tune jobs to log when an id is not provided + Sync fine-tunes to Weights & Biases. + :param id: The id of the fine-tune (optional) + :param n_fine_tunes: Number of most recent fine-tunes to log when an id is not provided. By default, every fine-tune is synced. :param project: Name of the project where you're sending runs. By default, it is "GPT-3". :param entity: Username or team name where you're sending runs. By default, your default entity is used, which is usually your username. - :param force: Forces logging and overwrite existing wandb run of the same finetune job. + :param force: Forces logging and overwrite existing wandb run of the same fine-tune. """ if not WANDB_AVAILABLE: @@ -60,12 +60,14 @@ def sync( # get list of fine_tune to log fine_tunes = FineTune.list() if not fine_tunes or fine_tunes.get("data") is None: - print("No fine-tune jobs have been retrieved") + print("No fine-tune has been retrieved") return - fine_tunes = fine_tunes["data"][-n_jobs if n_jobs is not None else None :] + fine_tunes = fine_tunes["data"][ + -n_fine_tunes if n_fine_tunes is not None else None : + ] # log starting from oldest fine_tune - show_warnings = False if id is None and n_jobs is None else True + show_warnings = False if id is None and n_fine_tunes is None else True fine_tune_logged = [ cls._log_fine_tune( fine_tune, @@ -79,7 +81,7 @@ def sync( ] if not show_warnings and not any(fine_tune_logged): - print("No new successful fine-tune were found") + print("No new successful fine-tunes were found") return "🎉 wandb sync completed successfully" @@ -93,7 +95,7 @@ def _log_fine_tune( # check run completed successfully if show_warnings and status != "succeeded": print( - f'Fine-tune job {fine_tune_id} has the status "{status}" and will not be logged' + f'Fine-tune {fine_tune_id} has the status "{status}" and will not be logged' ) return @@ -107,7 +109,7 @@ def _log_fine_tune( if show_warnings: if wandb_status == "succeeded": print( - f"Fine-tune job {fine_tune_id} has already been logged successfully at {wandb_run.url}" + f"Fine-tune {fine_tune_id} has already been logged successfully at {wandb_run.url}" ) if not force: print( @@ -115,11 +117,11 @@ def _log_fine_tune( ) else: print( - f"A run for fine-tune job {fine_tune_id} was previously created but didn't end successfully" + f"A run for fine-tune {fine_tune_id} was previously created but didn't end successfully" ) if wandb_status != "succeeded" or force: print( - f"A new wandb run will be created for fine-tune job {fine_tune_id} and previous run will be overwritten" + f"A new wandb run will be created for fine-tune {fine_tune_id} and previous run will be overwritten" ) if wandb_status == "succeeded" and not force: return @@ -130,7 +132,7 @@ def _log_fine_tune( # start a wandb run wandb.init( - job_type="finetune", + job_type="fine-tune", config=cls._get_config(fine_tune), project=project, entity=entity, @@ -151,7 +153,7 @@ def _log_fine_tune( if fine_tuned_model is not None: wandb.summary["fine_tuned_model"] = fine_tuned_model - # training/validation files and job details + # training/validation files and fine-tune details cls._log_artifacts(fine_tune, project, entity) # mark run as complete @@ -218,14 +220,14 @@ def _log_artifacts(cls, fine_tune, project, entity): if file is not None: cls._log_artifact_inputs(file, prefix, artifact_type, project, entity) - # job details + # fine-tune details fine_tune_id = fine_tune.get("id") artifact = wandb.Artifact( - "job_details", - type="job_details", + "fine_tune_details", + type="fine_tune_details", metadata=fine_tune, ) - with artifact.new_file("job_details.json") as f: + with artifact.new_file("fine_tune_details.json") as f: json.dump(fine_tune, f, indent=2) wandb.run.log_artifact( artifact, From 0b1751de68c67bf1c18fd9886926e2f71a75c460 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 19 Jan 2022 10:34:38 -0600 Subject: [PATCH 23/26] refactor(wandb): use show_individual_warnings --- openai/logger.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/openai/logger.py b/openai/logger.py index 8b4f318ebf..78bdcaf0ab 100644 --- a/openai/logger.py +++ b/openai/logger.py @@ -67,33 +67,41 @@ def sync( ] # log starting from oldest fine_tune - show_warnings = False if id is None and n_fine_tunes is None else True + show_individual_warnings = ( + False if id is None and n_fine_tunes is None else True + ) fine_tune_logged = [ cls._log_fine_tune( fine_tune, project, entity, force, - show_warnings, + show_individual_warnings, **kwargs_wandb_init, ) for fine_tune in fine_tunes ] - if not show_warnings and not any(fine_tune_logged): + if not show_individual_warnings and not any(fine_tune_logged): print("No new successful fine-tunes were found") return "🎉 wandb sync completed successfully" @classmethod def _log_fine_tune( - cls, fine_tune, project, entity, force, show_warnings, **kwargs_wandb_init + cls, + fine_tune, + project, + entity, + force, + show_individual_warnings, + **kwargs_wandb_init, ): fine_tune_id = fine_tune.get("id") status = fine_tune.get("status") # check run completed successfully - if show_warnings and status != "succeeded": + if show_individual_warnings and status != "succeeded": print( f'Fine-tune {fine_tune_id} has the status "{status}" and will not be logged' ) @@ -106,7 +114,7 @@ def _log_fine_tune( wandb_run = cls._get_wandb_run(run_path) if wandb_run: wandb_status = wandb_run.summary.get("status") - if show_warnings: + if show_individual_warnings: if wandb_status == "succeeded": print( f"Fine-tune {fine_tune_id} has already been logged successfully at {wandb_run.url}" From 81db437ff96d7d0330ce1175cef0caaf6db7ddf4 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 19 Jan 2022 10:50:11 -0600 Subject: [PATCH 24/26] feat(wandb): Logger -> WandbLogger --- openai/cli.py | 8 ++++---- openai/{logger.py => wandb_logger.py} | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) rename openai/{logger.py => wandb_logger.py} (99%) diff --git a/openai/cli.py b/openai/cli.py index 75d4f4e8a6..c57d4c973e 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -9,7 +9,6 @@ import requests import openai -import openai.logger from openai.upload_progress import BufferReader from openai.validators import ( apply_necessary_remediation, @@ -20,6 +19,7 @@ write_out_file, write_out_search_file, ) +import openai.wandb_logger class bcolors: @@ -536,10 +536,10 @@ def prepare_data(cls, args): ) -class Logger: +class WandbLogger: @classmethod def sync(cls, args): - resp = openai.logger.Logger.sync( + resp = openai.wandb_logger.WandbLogger.sync( id=args.id, n_fine_tunes=args.n_fine_tunes, project=args.project, @@ -1004,4 +1004,4 @@ def help(args): help="Forces logging and overwrite existing wandb run of the same fine-tune.", ) sub.set_defaults(force=False) - sub.set_defaults(func=Logger.sync) + sub.set_defaults(func=WandbLogger.sync) diff --git a/openai/logger.py b/openai/wandb_logger.py similarity index 99% rename from openai/logger.py rename to openai/wandb_logger.py index 78bdcaf0ab..9740098e02 100644 --- a/openai/logger.py +++ b/openai/wandb_logger.py @@ -18,7 +18,7 @@ from openai import File, FineTune -class Logger: +class WandbLogger: """ Log fine-tunes to Weights & Biases """ From e6f154f7c81012ef86292c75bcc98179fe4fe014 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Wed, 19 Jan 2022 16:02:21 -0600 Subject: [PATCH 25/26] feat(wandb): retrive number of items from artifact --- openai/wandb_logger.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openai/wandb_logger.py b/openai/wandb_logger.py index 9740098e02..228d4b6431 100644 --- a/openai/wandb_logger.py +++ b/openai/wandb_logger.py @@ -278,6 +278,9 @@ def _log_artifact_inputs(cls, file, prefix, artifact_type, project, entity): artifact.metadata["items"] = n_items except: print(f"File {file_id} could not be read as a valid JSON file") + else: + # log number of items + wandb.config.update({f"n_{prefix}": artifact.metadata.get("items")}) wandb.run.use_artifact(artifact, aliases=["latest", artifact_alias]) From 90c802bca79b2b6989e97ca9f0b972a7e9538671 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Thu, 27 Jan 2022 14:15:30 -0600 Subject: [PATCH 26/26] doc(wandb): add link to documentation --- README.md | 7 +++++++ openai/wandb_logger.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1aad1ff16b..9da57aaed8 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ search = openai.Engine(id="deployment-namme").search(documents=["White House", " # print the search print(search) ``` + Please note that for the moment, the Microsoft Azure endpoints can only be used for completion and search operations. ### Command-line interface @@ -142,6 +143,12 @@ Examples of fine tuning are shared in the following Jupyter notebooks: - [Step 2: Creating a synthetic Q&A dataset](https://github.com/openai/openai-python/blob/main/examples/finetuning/olympics-2-create-qa.ipynb) - [Step 3: Train a fine-tuning model specialized for Q&A](https://github.com/openai/openai-python/blob/main/examples/finetuning/olympics-3-train-qa.ipynb) +Sync your fine-tunes to [Weights & Biases](https://wandb.me/openai-docs) to track experiments, models, and datasets in your central dashboard with: + +```bash +openai wandb sync +``` + For more information on fine tuning, read the [fine-tuning guide](https://beta.openai.com/docs/guides/fine-tuning) in the OpenAI documentation. ## Requirements diff --git a/openai/wandb_logger.py b/openai/wandb_logger.py index 228d4b6431..7bdacd711c 100644 --- a/openai/wandb_logger.py +++ b/openai/wandb_logger.py @@ -20,7 +20,7 @@ class WandbLogger: """ - Log fine-tunes to Weights & Biases + Log fine-tunes to [Weights & Biases](https://wandb.me/openai-docs) """ if not WANDB_AVAILABLE: