44
55
66def main (args ):
7+ fname_inp = args .fname_inp .encode ("utf-8" )
8+ fname_out = args .fname_out .encode ("utf-8" )
79 if not os .path .exists (fname_inp ):
810 raise RuntimeError (f"Input file does not exist ({ fname_inp } )" )
911 if os .path .exists (fname_out ):
1012 raise RuntimeError (f"Output file already exists ({ fname_out } )" )
11- fname_inp = args .fname_inp . encode ( "utf-8" )
12- fname_out = args . fname_out . encode ( "utf-8" )
13- itype = args . itype
14- return_code = llama_cpp .llama_model_quantize (fname_inp , fname_out , itype )
13+ ftype = args .type
14+ args = llama_cpp . llama_model_quantize_default_params ( )
15+ args . ftype = ftype
16+ return_code = llama_cpp .llama_model_quantize (fname_inp , fname_out , args )
1517 if return_code != 0 :
1618 raise RuntimeError ("Failed to quantize model" )
1719
@@ -20,6 +22,7 @@ def main(args):
2022 parser = argparse .ArgumentParser ()
2123 parser .add_argument ("fname_inp" , type = str , help = "Path to input model" )
2224 parser .add_argument ("fname_out" , type = str , help = "Path to output model" )
23- parser .add_argument ("type" , type = int , help = "Type of quantization (2: q4_0, 3: q4_1)" )
25+ parser .add_argument ("type" , type = int , help = "Type of quantization (2: q4_0, 3: q4_1), see llama_cpp.py for enum " )
2426 args = parser .parse_args ()
2527 main (args )
28+
0 commit comments