4
4
5
5
6
6
def main (args ):
7
+ fname_inp = args .fname_inp .encode ("utf-8" )
8
+ fname_out = args .fname_out .encode ("utf-8" )
7
9
if not os .path .exists (fname_inp ):
8
10
raise RuntimeError (f"Input file does not exist ({ fname_inp } )" )
9
11
if os .path .exists (fname_out ):
10
12
raise RuntimeError (f"Output file already exists ({ fname_out } )" )
11
- fname_inp = args .fname_inp . encode ( "utf-8" )
12
- fname_out = args . fname_out . encode ( "utf-8" )
13
- itype = args . itype
14
- return_code = llama_cpp .llama_model_quantize (fname_inp , fname_out , itype )
13
+ ftype = args .type
14
+ args = llama_cpp . llama_model_quantize_default_params ( )
15
+ args . ftype = ftype
16
+ return_code = llama_cpp .llama_model_quantize (fname_inp , fname_out , args )
15
17
if return_code != 0 :
16
18
raise RuntimeError ("Failed to quantize model" )
17
19
@@ -20,6 +22,7 @@ def main(args):
20
22
parser = argparse .ArgumentParser ()
21
23
parser .add_argument ("fname_inp" , type = str , help = "Path to input model" )
22
24
parser .add_argument ("fname_out" , type = str , help = "Path to output model" )
23
- parser .add_argument ("type" , type = int , help = "Type of quantization (2: q4_0, 3: q4_1)" )
25
+ parser .add_argument ("type" , type = int , help = "Type of quantization (2: q4_0, 3: q4_1), see llama_cpp.py for enum " )
24
26
args = parser .parse_args ()
25
27
main (args )
28
+
0 commit comments