From 1d0c5ae81e41e2ca03ed5b73fa575dfe4601ec38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Thu, 29 Aug 2024 19:32:50 +0200 Subject: [PATCH] Clip: Fix tokenizer --- clip.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clip.hpp b/clip.hpp index 664da58c6..2a4df9de7 100644 --- a/clip.hpp +++ b/clip.hpp @@ -388,7 +388,7 @@ class CLIPTokenizer { std::string token_str = token.str(); std::u32string utf32_token; for (int i = 0; i < token_str.length(); i++) { - char b = token_str[i]; + unsigned char b = token_str[i]; utf32_token += byte_encoder[b]; } auto bpe_strs = bpe(utf32_token);