@@ -606,6 +606,201 @@ def prepare_data(cls, args):
606
606
)
607
607
608
608
609
+ class FineTuningJob :
610
+ @classmethod
611
+ def list (cls , args ):
612
+ has_ft_jobs = False
613
+ for fine_tune_job in openai .FineTuningJob .auto_paging_iter ():
614
+ has_ft_jobs = True
615
+ print (fine_tune_job )
616
+ if not has_ft_jobs :
617
+ print ("No fine-tuning jobs found." )
618
+
619
+ @classmethod
620
+ def _is_url (cls , file : str ):
621
+ return file .lower ().startswith ("http" )
622
+
623
+ @classmethod
624
+ def _download_file_from_public_url (cls , url : str ) -> Optional [bytes ]:
625
+ resp = requests .get (url )
626
+ if resp .status_code == 200 :
627
+ return resp .content
628
+ else :
629
+ return None
630
+
631
+ @classmethod
632
+ def _maybe_upload_file (
633
+ cls ,
634
+ file : Optional [str ] = None ,
635
+ content : Optional [bytes ] = None ,
636
+ user_provided_file : Optional [str ] = None ,
637
+ check_if_file_exists : bool = True ,
638
+ ):
639
+ # Exactly one of `file` or `content` must be provided
640
+ if (file is None ) == (content is None ):
641
+ raise ValueError ("Exactly one of `file` or `content` must be provided" )
642
+
643
+ if content is None :
644
+ assert file is not None
645
+ with open (file , "rb" ) as f :
646
+ content = f .read ()
647
+
648
+ if check_if_file_exists :
649
+ bytes = len (content )
650
+ matching_files = openai .File .find_matching_files (
651
+ name = user_provided_file or f .name ,
652
+ bytes = bytes ,
653
+ purpose = "fine-tune" ,
654
+ )
655
+ if len (matching_files ) > 0 :
656
+ file_ids = [f ["id" ] for f in matching_files ]
657
+ sys .stdout .write (
658
+ "Found potentially duplicated files with name '{name}', purpose 'fine-tune', and size {size} bytes\n " .format (
659
+ name = os .path .basename (matching_files [0 ]["filename" ]),
660
+ size = matching_files [0 ]["bytes" ]
661
+ if "bytes" in matching_files [0 ]
662
+ else matching_files [0 ]["size" ],
663
+ )
664
+ )
665
+ sys .stdout .write ("\n " .join (file_ids ))
666
+ while True :
667
+ sys .stdout .write (
668
+ "\n Enter file ID to reuse an already uploaded file, or an empty string to upload this file anyway: "
669
+ )
670
+ inp = sys .stdin .readline ().strip ()
671
+ if inp in file_ids :
672
+ sys .stdout .write (
673
+ "Reusing already uploaded file: {id}\n " .format (id = inp )
674
+ )
675
+ return inp
676
+ elif inp == "" :
677
+ break
678
+ else :
679
+ sys .stdout .write (
680
+ "File id '{id}' is not among the IDs of the potentially duplicated files\n " .format (
681
+ id = inp
682
+ )
683
+ )
684
+
685
+ buffer_reader = BufferReader (content , desc = "Upload progress" )
686
+ resp = openai .File .create (
687
+ file = buffer_reader ,
688
+ purpose = "fine-tune" ,
689
+ user_provided_filename = user_provided_file or file ,
690
+ )
691
+ sys .stdout .write (
692
+ "Uploaded file from {file}: {id}\n " .format (
693
+ file = user_provided_file or file , id = resp ["id" ]
694
+ )
695
+ )
696
+ sys .stdout .write ("Waiting for file to finish processing before proceeding..\n " )
697
+ sys .stdout .flush ()
698
+ status = openai .File .wait_for_processing (resp ["id" ])
699
+ if status != "processed" :
700
+ raise openai .error .OpenAIError (
701
+ "File {id} failed to process, status={status}." .format (
702
+ id = resp ["id" ], status = status
703
+ )
704
+ )
705
+
706
+ sys .stdout .write (
707
+ "File {id} finished processing and is ready for use in fine-tuning" .format (
708
+ id = resp ["id" ]
709
+ )
710
+ )
711
+ sys .stdout .flush ()
712
+ return resp ["id" ]
713
+
714
+ @classmethod
715
+ def _get_or_upload (cls , file , check_if_file_exists = True ):
716
+ try :
717
+ # 1. If it's a valid file, use it
718
+ openai .File .retrieve (file )
719
+ return file
720
+ except openai .error .InvalidRequestError :
721
+ pass
722
+ if os .path .isfile (file ):
723
+ # 2. If it's a file on the filesystem, upload it
724
+ return cls ._maybe_upload_file (
725
+ file = file , check_if_file_exists = check_if_file_exists
726
+ )
727
+ if cls ._is_url (file ):
728
+ # 3. If it's a URL, download it temporarily
729
+ content = cls ._download_file_from_public_url (file )
730
+ if content is not None :
731
+ return cls ._maybe_upload_file (
732
+ content = content ,
733
+ check_if_file_exists = check_if_file_exists ,
734
+ user_provided_file = file ,
735
+ )
736
+ return file
737
+
738
+ @classmethod
739
+ def create (cls , args ):
740
+ create_args = {
741
+ "training_file" : cls ._get_or_upload (
742
+ args .training_file , args .check_if_files_exist
743
+ ),
744
+ }
745
+ if args .validation_file :
746
+ create_args ["validation_file" ] = cls ._get_or_upload (
747
+ args .validation_file , args .check_if_files_exist
748
+ )
749
+
750
+ for param in ("model" , "suffix" ):
751
+ attr = getattr (args , param )
752
+ if attr is not None :
753
+ create_args [param ] = attr
754
+
755
+ if getattr (args , "n_epochs" ):
756
+ create_args ["hyperparameters" ] = {
757
+ "n_epochs" : args .n_epochs ,
758
+ }
759
+
760
+ resp = openai .FineTuningJob .create (** create_args )
761
+ print (resp )
762
+ return
763
+
764
+ @classmethod
765
+ def get (cls , args ):
766
+ resp = openai .FineTuningJob .retrieve (id = args .id )
767
+ print (resp )
768
+
769
+ @classmethod
770
+ def results (cls , args ):
771
+ fine_tune = openai .FineTuningJob .retrieve (id = args .id )
772
+ if "result_files" not in fine_tune or len (fine_tune ["result_files" ]) == 0 :
773
+ raise openai .error .InvalidRequestError (
774
+ f"No results file available for fine-tune { args .id } " , "id"
775
+ )
776
+ result_file = openai .FineTuningJob .retrieve (id = args .id )["result_files" ][0 ]
777
+ resp = openai .File .download (id = result_file )
778
+ print (resp .decode ("utf-8" ))
779
+
780
+ @classmethod
781
+ def events (cls , args ):
782
+ seen , has_more = 0 , True
783
+ while has_more :
784
+ resp = openai .FineTuningJob .list_events (id = args .id ) # type: ignore
785
+ for event in resp ["data" ]:
786
+ print (event )
787
+ seen += 1
788
+ if args .limit is not None and seen >= args .limit :
789
+ return
790
+ has_more = resp ["has_more" ]
791
+
792
+ @classmethod
793
+ def follow (cls , args ):
794
+ raise openai .error .OpenAIError (
795
+ message = "Event streaming is not yet supported for `fine_tuning.job` events"
796
+ )
797
+
798
+ @classmethod
799
+ def cancel (cls , args ):
800
+ resp = openai .FineTuningJob .cancel (id = args .id )
801
+ print (resp )
802
+
803
+
609
804
class WandbLogger :
610
805
@classmethod
611
806
def sync (cls , args ):
@@ -1098,6 +1293,83 @@ def help(args):
1098
1293
sub .add_argument ("--prompt" , type = str )
1099
1294
sub .set_defaults (func = Audio .translate )
1100
1295
1296
+ # FineTuning Jobs
1297
+ sub = subparsers .add_parser ("fine_tuning.job.list" )
1298
+ sub .set_defaults (func = FineTuningJob .list )
1299
+
1300
+ sub = subparsers .add_parser ("fine_tuning.job.create" )
1301
+ sub .add_argument (
1302
+ "-t" ,
1303
+ "--training_file" ,
1304
+ required = True ,
1305
+ help = "JSONL file containing either chat-completion or prompt-completion examples for training. "
1306
+ "This can be the ID of a file uploaded through the OpenAI API (e.g. file-abcde12345), "
1307
+ 'a local file path, or a URL that starts with "http".' ,
1308
+ )
1309
+ sub .add_argument (
1310
+ "-v" ,
1311
+ "--validation_file" ,
1312
+ help = "JSONL file containing either chat-completion or prompt-completion examples for validation. "
1313
+ "This can be the ID of a file uploaded through the OpenAI API (e.g. file-abcde12345), "
1314
+ 'a local file path, or a URL that starts with "http".' ,
1315
+ )
1316
+ sub .add_argument (
1317
+ "--no_check_if_files_exist" ,
1318
+ dest = "check_if_files_exist" ,
1319
+ action = "store_false" ,
1320
+ help = "If this argument is set and training_file or validation_file are file paths, immediately upload them. If this argument is not set, check if they may be duplicates of already uploaded files before uploading, based on file name and file size." ,
1321
+ )
1322
+ sub .add_argument (
1323
+ "-m" ,
1324
+ "--model" ,
1325
+ help = "The model to start fine-tuning from" ,
1326
+ )
1327
+ sub .add_argument (
1328
+ "--suffix" ,
1329
+ help = "If set, this argument can be used to customize the generated fine-tuned model name."
1330
+ "All punctuation and whitespace in `suffix` will be replaced with a "
1331
+ "single dash, and the string will be lower cased. The max "
1332
+ "length of `suffix` is 18 chars. "
1333
+ "The generated name will match the form `ft:{base_model}:{org-title}:{suffix}:{rstring}` where `rstring` "
1334
+ "is a random string sortable as a timestamp. "
1335
+ 'For example, `openai api fine_tuning.job.create -t test.jsonl -m gpt-3.5-turbo-0613 --suffix "first finetune!" '
1336
+ "could generate a model with the name "
1337
+ "ft:gpt-3.5-turbo-0613:your-org:first-finetune:7p4PqAoY" ,
1338
+ )
1339
+ sub .add_argument (
1340
+ "--n_epochs" ,
1341
+ type = int ,
1342
+ help = "The number of epochs to train the model for. An epoch refers to one "
1343
+ "full cycle through the training dataset." ,
1344
+ )
1345
+ sub .set_defaults (func = FineTuningJob .create )
1346
+
1347
+ sub = subparsers .add_parser ("fine_tuning.job.get" )
1348
+ sub .add_argument ("-i" , "--id" , required = True , help = "The id of the fine-tune job" )
1349
+ sub .set_defaults (func = FineTuningJob .get )
1350
+
1351
+ sub = subparsers .add_parser ("fine_tuning.job.results" )
1352
+ sub .add_argument ("-i" , "--id" , required = True , help = "The id of the fine-tune job" )
1353
+ sub .set_defaults (func = FineTuningJob .results )
1354
+
1355
+ sub = subparsers .add_parser ("fine_tuning.job.events" )
1356
+ sub .add_argument ("-i" , "--id" , required = True , help = "The id of the fine-tune job" )
1357
+ sub .add_argument (
1358
+ "--limit" ,
1359
+ type = int ,
1360
+ required = False ,
1361
+ help = "The number of events to return, starting from most recent. If not specified, all events will be returned." ,
1362
+ )
1363
+ sub .set_defaults (func = FineTuningJob .events )
1364
+
1365
+ sub = subparsers .add_parser ("fine_tuning.job.follow" )
1366
+ sub .add_argument ("-i" , "--id" , required = True , help = "The id of the fine-tune job" )
1367
+ sub .set_defaults (func = FineTuningJob .follow )
1368
+
1369
+ sub = subparsers .add_parser ("fine_tuning.job.cancel" )
1370
+ sub .add_argument ("-i" , "--id" , required = True , help = "The id of the fine-tune job" )
1371
+ sub .set_defaults (func = FineTuningJob .cancel )
1372
+
1101
1373
1102
1374
def wandb_register (parser ):
1103
1375
subparsers = parser .add_subparsers (
0 commit comments