@@ -208,7 +208,7 @@ def tokenize(self, text: bytes, add_bos: bool, special: bool):
208
208
def token_to_piece (self , token : int , special : bool = False ) -> bytes :
209
209
assert self .model is not None
210
210
buf = ctypes .create_string_buffer (32 )
211
- llama_cpp .llama_token_to_piece (self .model , token , buf , 32 , special )
211
+ llama_cpp .llama_token_to_piece (self .model , token , buf , 32 , 0 , special )
212
212
return bytes (buf )
213
213
214
214
def detokenize (self , tokens : List [int ], special : bool = False ) -> bytes :
@@ -218,7 +218,7 @@ def detokenize(self, tokens: List[int], special: bool = False) -> bytes:
218
218
buffer = (ctypes .c_char * size )()
219
219
for token in tokens :
220
220
n = llama_cpp .llama_token_to_piece (
221
- self .model , llama_cpp .llama_token (token ), buffer , size , special
221
+ self .model , llama_cpp .llama_token (token ), buffer , size , 0 , special
222
222
)
223
223
assert n <= size
224
224
output += bytes (buffer [:n ])
@@ -635,10 +635,10 @@ def _tokenize(model: _LlamaModel, text: str, add_bos: bool, special: bool) -> li
635
635
def _token_to_piece (model : _LlamaModel , token : int , special : bool = False ) -> str :
636
636
assert model .model is not None
637
637
result = (ctypes .c_char * 8 )(0 )
638
- n_tokens = llama_cpp .llama_token_to_piece (model .model , token , result , len (result ), special )
638
+ n_tokens = llama_cpp .llama_token_to_piece (model .model , token , result , 0 , len (result ), special )
639
639
if n_tokens < 0 :
640
640
result = (ctypes .c_char * - n_tokens )(0 )
641
- check = llama_cpp .llama_token_to_piece (model .model , token , result , len (result ), special )
641
+ check = llama_cpp .llama_token_to_piece (model .model , token , result , 0 , len (result ), special )
642
642
if check != - n_tokens :
643
643
raise RuntimeError (f"Failed to get piece: token={ token } " )
644
644
else :
0 commit comments