diff --git a/Makefile b/Makefile index b0b2aed..9d385a2 100644 --- a/Makefile +++ b/Makefile @@ -23,6 +23,9 @@ build.cublas: ${submodules} update-pip ## Build ggml-python with cublas / cuda s build.clblast: ${submodules} update-pip ## Build ggml-python with clblast / opencl support CMAKE_ARGS="-DGGML_CLBLAST=On" python3 -m pip install --verbose --editable . +build.vulkan: ${submodules} update-pip ## Build ggml-python with clblast / opencl support + CMAKE_ARGS="-DGGML_VULKAN=On" python3 -m pip install --verbose --editable . + sdist: ## Build source distribution python3 -m build --sdist diff --git a/ggml/ggml.py b/ggml/ggml.py index 0ce9850..570ae4b 100644 --- a/ggml/ggml.py +++ b/ggml/ggml.py @@ -10753,7 +10753,7 @@ def ggml_backend_opencl_host_buffer_type() -> ggml_backend_buffer_type_t: # source: src/ggml-vulkan.h ##################################################### -GGML_HAS_VULKAN = hasattr(lib, "ggml_vk_init_cpu_assist") +GGML_USE_VULKAN = hasattr(lib, "ggml_vk_init_cpu_assist") # #define GGML_VK_NAME "Vulkan" # #define GGML_VK_MAX_DEVICES 16 @@ -10766,7 +10766,7 @@ def ggml_vk_init_cpu_assist(): return lib.ggml_vk_init_cpu_assist() -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_vk_init_cpu_assist.argtypes = [] lib.ggml_vk_init_cpu_assist.restype = None @@ -10776,7 +10776,7 @@ def ggml_vk_preallocate_buffers_graph_cpu_assist(node: ggml_tensor_p): return lib.ggml_vk_preallocate_buffers_graph_cpu_assist(node) -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_vk_preallocate_buffers_graph_cpu_assist.argtypes = [ ctypes.POINTER(ggml_tensor) ] @@ -10788,7 +10788,7 @@ def ggml_vk_preallocate_buffers_cpu_assist(): return lib.ggml_vk_preallocate_buffers_cpu_assist() -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_vk_preallocate_buffers_cpu_assist.argtypes = [] lib.ggml_vk_preallocate_buffers_cpu_assist.restype = None @@ -10798,7 +10798,7 @@ def ggml_vk_build_graph_cpu_assist(node: ggml_tensor_p, last_node: bool): return lib.ggml_vk_build_graph_cpu_assist(node, last_node) -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_vk_build_graph_cpu_assist.argtypes = [ ctypes.POINTER(ggml_tensor), ctypes.c_bool, @@ -10813,7 +10813,7 @@ def ggml_vk_compute_forward_cpu_assist( return lib.ggml_vk_compute_forward_cpu_assist(params, tensor) -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_vk_compute_forward_cpu_assist.argtypes = [ ctypes.POINTER(ggml_compute_params), ctypes.POINTER(ggml_tensor), @@ -10830,7 +10830,7 @@ def ggml_vk_graph_cleanup_cpu_assist(): return lib.ggml_vk_graph_cleanup_cpu_assist() -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_vk_graph_cleanup_cpu_assist.argtypes = [] lib.ggml_vk_graph_cleanup_cpu_assist.restype = None @@ -10840,7 +10840,7 @@ def ggml_vk_free_cpu_assist(): return lib.ggml_vk_free_cpu_assist() -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_vk_free_cpu_assist.argtypes = [] lib.ggml_vk_free_cpu_assist.restype = None @@ -10853,7 +10853,7 @@ def ggml_backend_vk_init( return lib.ggml_backend_vk_init(dev_num) -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_backend_vk_init.argtypes = [ctypes.c_size_t] lib.ggml_backend_vk_init.restype = ggml_backend_t @@ -10863,7 +10863,7 @@ def ggml_backend_is_vk(backend: ggml_backend_t) -> bool: return lib.ggml_backend_is_vk(backend) -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_backend_is_vk.argtypes = [ggml_backend_t] lib.ggml_backend_is_vk.restype = ctypes.c_bool @@ -10873,7 +10873,7 @@ def ggml_backend_vk_get_device_count() -> int: return lib.ggml_backend_vk_get_device_count() -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_backend_vk_get_device_count.argtypes = [] lib.ggml_backend_vk_get_device_count.restype = ctypes.c_int @@ -10889,7 +10889,7 @@ def ggml_backend_vk_get_device_description( ) -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_backend_vk_get_device_description.argtypes = [ ctypes.c_int, ctypes.c_char_p, @@ -10907,7 +10907,7 @@ def ggml_backend_vk_get_device_memory( return lib.ggml_backend_vk_get_device_memory(device, free, total) -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_backend_vk_get_device_memory.argtypes = [ ctypes.c_int, ctypes.POINTER(ctypes.c_size_t), @@ -10923,7 +10923,7 @@ def ggml_backend_vk_buffer_type( return lib.ggml_backend_vk_buffer_type(dev_num) -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_backend_vk_buffer_type.argtypes = [ctypes.c_size_t] lib.ggml_backend_vk_buffer_type.restype = ggml_backend_buffer_type_t @@ -10934,7 +10934,7 @@ def ggml_backend_vk_host_buffer_type() -> ggml_backend_buffer_type_t: return lib.ggml_backend_vk_host_buffer_type() -if GGML_HAS_VULKAN: +if GGML_USE_VULKAN: lib.ggml_backend_vk_host_buffer_type.argtypes = [] lib.ggml_backend_vk_host_buffer_type.restype = ggml_backend_buffer_type_t diff --git a/tests/test_ggml_vulkan.py b/tests/test_ggml_vulkan.py new file mode 100644 index 0000000..3084d5d --- /dev/null +++ b/tests/test_ggml_vulkan.py @@ -0,0 +1,121 @@ +import ggml +import ggml.utils +import ctypes +import pytest +import numpy as np + +from ggml.utils import setup_sigabrt_handler + +setup_sigabrt_handler() + +ggml_vulkan_available = ggml.GGML_USE_VULKAN + +run_if_ggml_vulkan_available = pytest.mark.skipif( + not ggml_vulkan_available, + reason="Vulkan not available", +) + +@run_if_ggml_vulkan_available +def test_vulkan(): + n_tensors = 1 + 2 # input (x) and weights (a, b) + params = ggml.ggml_init_params( + mem_size=ggml.ggml_tensor_overhead() * n_tensors, mem_buffer=None, no_alloc=True + ) + ctx = ggml.ggml_init(params=params) + assert ctx is not None + + backend = ggml.ggml_backend_vk_init(0) + + assert backend is not None + + # create the tensors for input and weights + x = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1) + + a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1) + b = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1) + + # allocate the tensors in the backend + buffer = ggml.ggml_backend_alloc_ctx_tensors(ctx, backend) + + # set the values of the weights + ggml.ggml_backend_tensor_set( + a, + ctypes.cast(np.array([3.0], dtype=np.single).ctypes.data, ctypes.c_void_p), + 0, + ggml.ggml_nbytes(a), + ) + ggml.ggml_backend_tensor_set( + b, + ctypes.cast(np.array([4.0], dtype=np.single).ctypes.data, ctypes.c_void_p), + 0, + ggml.ggml_nbytes(a), + ) + + max_nodes = 4096 + + buf_size = ( + ggml.ggml_tensor_overhead() * max_nodes + + ggml.ggml_graph_overhead_custom(max_nodes, False) + ) + buf = (ctypes.c_uint8 * buf_size)() + + def build_graph( + x: ggml.ggml_tensor_p, a: ggml.ggml_tensor_p, b: ggml.ggml_tensor_p + ): + params = ggml.ggml_init_params( + mem_size=buf_size, + mem_buffer=ctypes.cast(buf, ctypes.c_void_p), + no_alloc=True, + ) + ctx0 = ggml.ggml_init(params=params) + + assert ctx0 is not None + + gf = ggml.ggml_new_graph_custom(ctx0, max_nodes, False) + + x2 = ggml.ggml_mul(ctx0, x, x) + ax2 = ggml.ggml_mul(ctx0, a, x2) + f = ggml.ggml_add(ctx0, ax2, b) + + ggml.ggml_set_name(x2, b"x2") + ggml.ggml_set_name(ax2, b"ax2") + ggml.ggml_set_name(f, b"f") + + ggml.ggml_build_forward_expand(gf, f) + + ggml.ggml_free(ctx0) + + return gf + + allocr = ggml.ggml_gallocr_new(ggml.ggml_backend_get_default_buffer_type(backend)) + + gf = build_graph(x, a, b) + + ggml.ggml_gallocr_reserve(allocr, gf) + + gf = build_graph(x, a, b) + + ggml.ggml_gallocr_alloc_graph(allocr, gf) + + ggml.ggml_backend_tensor_set( + x, + ctypes.cast(np.array([2.0], dtype=np.single).ctypes.data, ctypes.c_void_p), + 0, + ggml.ggml_nbytes(x), + ) + + ggml.ggml_backend_graph_compute(backend, gf) + + f = ggml.ggml_graph_get_tensor(gf, b"f") + + output = np.zeros(1, dtype=np.single) + ggml.ggml_backend_tensor_get( + f, ctypes.cast(output.ctypes.data, ctypes.c_void_p), 0, ggml.ggml_nbytes(x) + ) + + assert output[0] == 16.0 + + ggml.ggml_gallocr_free(allocr) + ggml.ggml_backend_buffer_free(buffer) + ggml.ggml_backend_free(backend) + ggml.ggml_free(ctx)