Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6689d71

Browse files
committed
Merge branch 'main' into configurable-chat-templates
2 parents 45a4188 + 6e167a2 commit 6689d71

File tree

6 files changed

+27
-7
lines changed

6 files changed

+27
-7
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
- Update llama.cpp to 8781013ef654270cbead3e0011e33a6d690fb168
11+
12+
## [0.2.6]
13+
1014
- Update llama.cpp to 80291a1d02a07f7f66666fb576c5b1e75aa48b46
1115

1216
## [0.2.5]

CMakeLists.txt

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake_minimum_required(VERSION 3.4...3.22)
1+
cmake_minimum_required(VERSION 3.21)
22

33
project(llama_cpp)
44

@@ -33,4 +33,13 @@ if (LLAMA_BUILD)
3333
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
3434
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
3535
)
36+
# Workaround for Windows + CUDA https://github.com/abetlen/llama-cpp-python/issues/563
37+
install(
38+
FILES $<TARGET_RUNTIME_DLLS:llama>
39+
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
40+
)
41+
install(
42+
FILES $<TARGET_RUNTIME_DLLS:llama>
43+
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
44+
)
3645
endif()

llama_cpp/llama.py

+2
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,7 @@ def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]:
437437
n_tokens = llama_cpp.llama_tokenize_with_model(
438438
self.model,
439439
text,
440+
len(text),
440441
tokens,
441442
n_ctx,
442443
add_bos,
@@ -447,6 +448,7 @@ def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]:
447448
n_tokens = llama_cpp.llama_tokenize_with_model(
448449
self.model,
449450
text,
451+
len(text),
450452
tokens,
451453
n_tokens,
452454
add_bos,

llama_cpp/llama_cpp.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def _load_shared_library(lib_base_name: str):
5858
if "CUDA_PATH" in os.environ:
5959
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin"))
6060
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "lib"))
61-
cdll_args["winmode"] = 0
61+
cdll_args["winmode"] = ctypes.RTLD_GLOBAL
6262

6363
# Try to load the shared library, handling potential errors
6464
for _lib_path in _lib_paths:
@@ -950,42 +950,47 @@ def llama_token_nl(ctx: llama_context_p) -> llama_token:
950950
# LLAMA_API int llama_tokenize(
951951
# struct llama_context * ctx,
952952
# const char * text,
953+
# int text_len,
953954
# llama_token * tokens,
954955
# int n_max_tokens,
955956
# bool add_bos);
956957
def llama_tokenize(
957958
ctx: llama_context_p,
958959
text: bytes,
960+
text_len: Union[c_int, int],
959961
tokens, # type: Array[llama_token]
960962
n_max_tokens: Union[c_int, int],
961963
add_bos: Union[c_bool, int],
962964
) -> int:
963-
return _lib.llama_tokenize(ctx, text, tokens, n_max_tokens, add_bos)
965+
return _lib.llama_tokenize(ctx, text, text_len, tokens, n_max_tokens, add_bos)
964966

965967

966-
_lib.llama_tokenize.argtypes = [llama_context_p, c_char_p, llama_token_p, c_int, c_bool]
968+
_lib.llama_tokenize.argtypes = [llama_context_p, c_char_p, c_int, llama_token_p, c_int, c_bool]
967969
_lib.llama_tokenize.restype = c_int
968970

969971

970972
# LLAMA_API int llama_tokenize_with_model(
971973
# const struct llama_model * model,
972974
# const char * text,
975+
# int text_len,
973976
# llama_token * tokens,
974977
# int n_max_tokens,
975978
# bool add_bos);
976979
def llama_tokenize_with_model(
977980
model: llama_model_p,
978981
text: bytes,
982+
text_len: Union[c_int, int],
979983
tokens, # type: Array[llama_token]
980984
n_max_tokens: Union[c_int, int],
981985
add_bos: Union[c_bool, bool],
982986
) -> int:
983-
return _lib.llama_tokenize_with_model(model, text, tokens, n_max_tokens, add_bos)
987+
return _lib.llama_tokenize_with_model(model, text, text_len, tokens, n_max_tokens, add_bos)
984988

985989

986990
_lib.llama_tokenize_with_model.argtypes = [
987991
llama_model_p,
988992
c_char_p,
993+
c_int,
989994
llama_token_p,
990995
c_int,
991996
c_bool,

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ all = [
5454
[tool.scikit-build]
5555
wheel.packages = ["llama_cpp"]
5656
cmake.verbose = true
57-
cmake.minimum-version = "3.12"
57+
cmake.minimum-version = "3.21"
5858
minimum-version = "0.5"
5959
sdist.exclude = [".git", "vendor/llama.cpp/.git"]
6060

vendor/llama.cpp

0 commit comments

Comments
 (0)