From 35bff171afbb6866f3a7a6896c716d01d06330d3 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Sun, 15 Dec 2024 11:45:43 +0530 Subject: [PATCH 01/11] Migrate to tensor->buffer for checking backend buffer type: 1 --- ggml/src/ggml-sycl/ggml-sycl.cpp | 40 +++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 6b9f0b0d9a1c8..9710688f8802c 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -287,11 +287,9 @@ static void ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) try { ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context; - - if (tensor->view_src != NULL && tensor->view_offs == 0) { + + if (tensor->view_src != NULL) { assert(tensor->view_src->buffer->buft == buffer->buft); - tensor->backend = tensor->view_src->backend; - tensor->extra = tensor->view_src->extra; return; } @@ -746,7 +744,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer, size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING); } - // FIXME: do not crash if cudaMalloc fails + // FIXME: do not crash if SYCL Buffer alloc fails // currently, init_tensor cannot fail, it needs to be fixed in ggml-backend first ggml_sycl_set_device(i); const queue_ptr stream = ctx->streams[i]; @@ -788,7 +786,6 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer, CHECK_TRY_ERROR(extra->events[i][is] = new sycl::event())); } } - tensor->backend = GGML_BACKEND_TYPE_GPU_SPLIT; tensor->extra = extra; } catch (sycl::exception const &exc) { @@ -928,7 +925,7 @@ static const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_bu GGML_UNUSED(buft); } -static bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer) { +bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer) { return buffer->buft->iface.get_name == ggml_backend_sycl_split_buffer_type_get_name; } @@ -2349,12 +2346,22 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, dpct::memcpy_direction kind; char * src_ptr; - if (src->backend == GGML_BACKEND_TYPE_CPU) { + if (ggml_backend_buffer_is_host(src->buffer)) { kind = dpct::host_to_device; src_ptr = (char *) src->data; // GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr); - } else if (src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT) { - GGML_ASSERT(src->backend != GGML_BACKEND_TYPE_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne[1])); + } else if (ggml_backend_buffer_is_sycl(src->buffer) || ggml_backend_buffer_is_sycl_split(src->buffer)) { + if (!ggml_backend_buffer_is_sycl_split(src->buffer)){ + // Tensor is already on the device, what kind to choose here? + kind = dpct::device_to_device; + src_ptr = (char *) src->data; + } + else { + /* + This assertion seems to me that split buffers aren't supported in SYCL + Use ggml_abort()? + */ + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src->buffer) || (i1_low == 0 && i1_high == src->ne[1])); kind = dpct::device_to_device; ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra; int id; @@ -2362,6 +2369,7 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, id = get_current_device_id())); // GGML_SYCL_DEBUG("current device index %d\n", id); src_ptr = (char *) extra->data_device[id]; + } } else { // GGML_SYCL_DEBUG("GGML_ABORT("fatal error")\n"); GGML_ABORT("fatal error"); @@ -2857,8 +2865,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten const int nb2 = dst->nb[2]; const int nb3 = dst->nb[3]; - GGML_ASSERT(dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT); - GGML_ASSERT(src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer)); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src1->buffer)); GGML_ASSERT(src1->type == GGML_TYPE_F32 || (src1->ne[2] == 1 && src1->ne[3] == 1)); GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0); @@ -2878,7 +2886,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING); - const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT; + const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer); GGML_ASSERT(!(split && ne02 > 1)); GGML_ASSERT(!(split && ne03 > 1)); GGML_ASSERT(!(split && ne02 < ne12)); @@ -3198,7 +3206,7 @@ static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const gg const ggml_tensor *src1, ggml_tensor *dst) try { GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1)); - GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer)); GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation GGML_ASSERT(src0->type == GGML_TYPE_F16); @@ -3231,7 +3239,7 @@ static void ggml_sycl_mul_mat_vec_nc(ggml_backend_sycl_context & ctx, const ggml GGML_ASSERT(!ggml_is_transposed(src0)); GGML_ASSERT(!ggml_is_transposed(src1)); GGML_ASSERT(!ggml_is_permuted(src0)); - GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer)); GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src1->type == GGML_TYPE_F32); @@ -3293,7 +3301,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, ggml_tensor *dst) try { GGML_ASSERT(!ggml_is_transposed(src0)); GGML_ASSERT(!ggml_is_transposed(src1)); - GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT); + GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer)); GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_TENSOR_BINARY_OP_LOCALS From da40c42062688964d43d0a53558bf9b006e89f79 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Sun, 15 Dec 2024 18:41:58 +0530 Subject: [PATCH 02/11] SYCL: common.cpp try to migrate away from tensor->backend --- ggml/src/ggml-sycl/common.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp index a9ee404911460..13e86437cc398 100644 --- a/ggml/src/ggml-sycl/common.cpp +++ b/ggml/src/ggml-sycl/common.cpp @@ -65,9 +65,9 @@ void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *sr const ggml_sycl_op_flatten_t op) try { const bool use_src1 = src1 != nullptr; - - GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT); - GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT); + if(use_src1) + GGML_ASSERT(strcmp(src1->buffer->buft->iface.get_name(src1->buffer->buft), GGML_SYCL_NAME "_Split") != 0); + GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); // dd = data device float * src0_ddf = (float *) src0->data; From f8603b0cc0cdf29b64a02bfd877bd5e32bb0651f Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Sun, 15 Dec 2024 19:11:43 +0530 Subject: [PATCH 03/11] SYCL: fix assertions and add proper comments --- ggml/src/ggml-sycl/common.cpp | 2 ++ ggml/src/ggml-sycl/ggml-sycl.cpp | 19 +++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp index 13e86437cc398..88314a5cd73af 100644 --- a/ggml/src/ggml-sycl/common.cpp +++ b/ggml/src/ggml-sycl/common.cpp @@ -11,6 +11,8 @@ // #include "common.hpp" + +#include "ggml-backend-impl.h" #include "ggml-impl.h" int get_current_device_id() { diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 9710688f8802c..5f6169647ec33 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -2352,16 +2352,27 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, // GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr); } else if (ggml_backend_buffer_is_sycl(src->buffer) || ggml_backend_buffer_is_sycl_split(src->buffer)) { if (!ggml_backend_buffer_is_sycl_split(src->buffer)){ - // Tensor is already on the device, what kind to choose here? + // If buffer is not a SYCL split buffer + /* + What memcpy_direction kind we need here? + Refer: dpct/helper.hpp: + enum memcpy_direction + { + host_to_host, + host_to_device, + device_to_host, + device_to_device, + automatic + }; + */ kind = dpct::device_to_device; src_ptr = (char *) src->data; } else { /* - This assertion seems to me that split buffers aren't supported in SYCL - Use ggml_abort()? + If buffer is a SYCL split buffer */ - GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src->buffer) || (i1_low == 0 && i1_high == src->ne[1])); + GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]); kind = dpct::device_to_device; ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra; int id; From 0662a86809348d8ef61878d4a32dcee70619b7aa Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Sun, 15 Dec 2024 19:18:52 +0530 Subject: [PATCH 04/11] SYCL: remove extra space --- ggml/src/ggml-sycl/ggml-sycl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 5f6169647ec33..7598c5fa57bf9 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -287,7 +287,7 @@ static void ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) try { ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context; - + if (tensor->view_src != NULL) { assert(tensor->view_src->buffer->buft == buffer->buft); return; From 5ed4403558d540df07573abace249f6ab51f43af Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Sun, 15 Dec 2024 19:22:52 +0530 Subject: [PATCH 05/11] SYCL: Add back static to ggml_backend_buffer_is_sycl_split function --- ggml/src/ggml-sycl/ggml-sycl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 7598c5fa57bf9..089701550b8b2 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -925,7 +925,7 @@ static const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_bu GGML_UNUSED(buft); } -bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer) { +static bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer) { return buffer->buft->iface.get_name == ggml_backend_sycl_split_buffer_type_get_name; } From 19ce4b64b75cbbcb9c2b7b88d7d4a07c56a82551 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Sun, 15 Dec 2024 21:02:43 +0530 Subject: [PATCH 06/11] SYCL: Add pragma directive to suppress warning spam --- ggml/src/ggml-sycl/common.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index c1582f610e5f4..62b4cea3ada85 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -26,7 +26,11 @@ #define GGML_COMMON_DECL_SYCL #define GGML_COMMON_IMPL_SYCL +/* suppress warning spam */ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnested-anon-types" #include "ggml-common.h" +#pragma clang diagnostic pop void* ggml_sycl_host_malloc(size_t size); void ggml_sycl_host_free(void* ptr); From 2607b7de0f0d2f4f1f690226f86fa861aa39cb97 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Mon, 16 Dec 2024 11:27:38 +0530 Subject: [PATCH 07/11] SYCL: Integrate debug logs with GGML_LOG and other fixes --- ggml/src/ggml-sycl/common.cpp | 4 +- ggml/src/ggml-sycl/common.hpp | 11 +- ggml/src/ggml-sycl/element_wise.cpp | 97 ++++++++--------- ggml/src/ggml-sycl/ggml-sycl.cpp | 160 ++++++++++++++-------------- 4 files changed, 131 insertions(+), 141 deletions(-) diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp index 88314a5cd73af..4175fd6b93212 100644 --- a/ggml/src/ggml-sycl/common.cpp +++ b/ggml/src/ggml-sycl/common.cpp @@ -82,8 +82,8 @@ void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *sr ggml_sycl_set_device(ctx.device); queue_ptr main_stream = ctx.stream(); - // GGML_SYCL_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n", - // ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device); + // GGML_LOG_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n", + // ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device); // do the computation op(ctx, src0, src1, dst, src0_ddf, src1_ddf, dst_ddf, main_stream); diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 62b4cea3ada85..c18d2acc76db8 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -31,17 +31,11 @@ #pragma clang diagnostic ignored "-Wnested-anon-types" #include "ggml-common.h" #pragma clang diagnostic pop +#include "ggml-impl.h" void* ggml_sycl_host_malloc(size_t size); void ggml_sycl_host_free(void* ptr); -static int g_ggml_sycl_debug = 0; -#define GGML_SYCL_DEBUG(...) \ - do { \ - if (g_ggml_sycl_debug) \ - fprintf(stderr, __VA_ARGS__); \ - } while (0) - #define CHECK_TRY_ERROR(expr) \ [&]() { \ try { \ @@ -167,8 +161,7 @@ inline dpct::err0 ggml_sycl_set_device(const int device) try { int current_device_id; SYCL_CHECK(CHECK_TRY_ERROR(current_device_id = get_current_device_id())); - // GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d, - // current_device_id=%d\n", device, current_device); + GGML_LOG_DEBUG("ggml_sycl_set_device device_id=%d,current_device_id=%d\n", device, current_device_id); if (device == current_device_id) { return 0; } diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index d05a51f807c20..d64f7bba99ce2 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -1,5 +1,6 @@ #include "common.hpp" #include "element_wise.hpp" +#include "ggml-impl.h" void acc_f32(const float * x, const float * y, float * dst, const int ne, const int ne10, const int ne11, const int ne12, @@ -883,148 +884,148 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 089701550b8b2..6312374d691c9 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -64,7 +64,6 @@ static ggml_sycl_device_info ggml_sycl_init() { #else GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__); #endif - GGML_LOG_INFO("%s: found %d %s devices:\n", __func__, info.device_count, GGML_SYCL_NAME); for (int i = 0; i < info.device_count; ++i) { info.devices[i].vmm = 0; @@ -117,7 +116,7 @@ void print_device_detail(int id, sycl::device &device, std::string device_type) } void ggml_backend_sycl_print_sycl_devices() { - GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n"); + GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n"); int device_count = dpct::dev_mgr::instance().device_count(); std::map DeviceNums; GGML_LOG_INFO("Found %d SYCL devices:\n", device_count); @@ -146,27 +145,11 @@ void ggml_backend_sycl_print_sycl_devices() { } } -static inline int get_sycl_env(const char *env_name, int default_val) { - char *user_device_string = getenv(env_name); - int user_number = default_val; - - unsigned n; - if (user_device_string != NULL && - sscanf(user_device_string, " %u", &n) == 1) { - user_number = (int)n; - } else { - user_number = default_val; - } - return user_number; -} - static void ggml_check_sycl() try { static bool initialized = false; if (!initialized) { - GGML_LOG_INFO("[SYCL] call ggml_check_sycl\n"); - g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0); - GGML_LOG_INFO("%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug); + GGML_LOG_DEBUG("[SYCL] call ggml_check_sycl\n"); #if defined(GGML_SYCL_F16) GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__); @@ -221,7 +204,7 @@ inline void check_allow_gpu_index(const int device_index) { } GGML_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len) try { - GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_gpu_list\n"); + GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_gpu_list\n"); for(int i=0;i lock(mutex); - GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n"); + GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n"); auto dev_count = ggml_backend_sycl_get_device_count(); if (device>=dev_count or device<0) { - printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", + GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", device, dev_count-1); GGML_ASSERT(devicedevice; if (device>=ggml_sycl_info().device_count or device<0) { - printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", + GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", device, ggml_sycl_info().device_count-1); GGML_ASSERT(device lock(mutex); - GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_split_buffer_type\n"); + GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_split_buffer_type\n"); ggml_check_sycl(); // FIXME: this is not thread safe static std::map, struct ggml_backend_buffer_type> buft_map; @@ -1055,7 +1038,7 @@ static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggm } ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() { - GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n"); + GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n"); static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = { /* .iface = */ { /* .get_name = */ ggml_backend_sycl_host_buffer_type_name, @@ -1156,7 +1139,7 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool { (uint32_t)(max_size/1024/1024), (uint32_t)(g_sycl_pool_size[id]/1024/1024), (uint32_t)(size/1024/1024)); #endif - // GGML_SYCL_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr); + GGML_LOG_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr); return ptr; } @@ -2348,41 +2331,31 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, char * src_ptr; if (ggml_backend_buffer_is_host(src->buffer)) { kind = dpct::host_to_device; + GGML_LOG_DEBUG("%s: Host buffer type src tensor: %p\n", __func__, src_ptr); src_ptr = (char *) src->data; - // GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr); } else if (ggml_backend_buffer_is_sycl(src->buffer) || ggml_backend_buffer_is_sycl_split(src->buffer)) { if (!ggml_backend_buffer_is_sycl_split(src->buffer)){ - // If buffer is not a SYCL split buffer - /* - What memcpy_direction kind we need here? - Refer: dpct/helper.hpp: - enum memcpy_direction - { - host_to_host, - host_to_device, - device_to_host, - device_to_device, - automatic - }; - */ - kind = dpct::device_to_device; + // If buffer is not single GPU SYCL buffer + GGML_LOG_DEBUG("%s: SYCL buffer type src tensor: %p\n", __func__, src->data); + kind = dpct::device_to_device; src_ptr = (char *) src->data; } else { /* If buffer is a SYCL split buffer */ - GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]); - kind = dpct::device_to_device; - ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra; - int id; - SYCL_CHECK(CHECK_TRY_ERROR( - id = get_current_device_id())); - // GGML_SYCL_DEBUG("current device index %d\n", id); - src_ptr = (char *) extra->data_device[id]; + GGML_LOG_DEBUG("%s: Split buffer type src tensor\n", __func__); + GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]); + kind = dpct::device_to_device; + ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra; + int id; + SYCL_CHECK(CHECK_TRY_ERROR( + id = get_current_device_id())); + GGML_LOG_DEBUG("current device index %d\n", id); + src_ptr = (char *) extra->data_device[id]; } } else { - // GGML_SYCL_DEBUG("GGML_ABORT("fatal error")\n"); + GGML_LOG_DEBUG("%s: GGML_ABORT(\"fatal error\")\n", __func__); GGML_ABORT("fatal error"); } char * dst_ptr = (char *) dst; @@ -2396,7 +2369,7 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, const char * x = src_ptr + i1_low*nb1 + i2*nb2 + i3*nb3; if (nb0 == ts && nb1 == ts*ne0/bs) { - // GGML_SYCL_DEBUG("stream->memcpy: dst_ptr=%p, x=%p, size=%lu\n", dst_ptr, x, i1_diff * nb1); + GGML_LOG_DEBUG("stream->memcpy: dst_ptr=%p, x=%p, size=%lu\n", dst_ptr, x, i1_diff * nb1); // return CHECK_TRY_ERROR(stream->memcpy(dst_ptr, x, i1_diff * nb1)); return CHECK_TRY_ERROR(dpct::async_dpct_memcpy(dst_ptr, x, i1_diff * nb1, kind, *stream)); @@ -2526,7 +2499,7 @@ inline void ggml_sycl_op_mul_mat_sycl( use_fp16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT) { - // GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n"); + GGML_LOG_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n"); ggml_sycl_pool_alloc src0_as_f16(ctx.pool()); if (src0->type != GGML_TYPE_F16) { const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src0->type); @@ -2573,7 +2546,7 @@ inline void ggml_sycl_op_mul_mat_sycl( #endif } else { - // GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp32 path\n"); + GGML_LOG_DEBUG("ggml_sycl_op_mul_mat_sycl - fp32 path\n"); ggml_sycl_pool_alloc src0_ddq_as_f32(ctx.pool()); ggml_sycl_pool_alloc src1_ddq_as_f32(ctx.pool()); if (src0->type != GGML_TYPE_F32) { @@ -3184,33 +3157,33 @@ catch (sycl::exception const &exc) { static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_repeat); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_get_rows); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_norm); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_SYCL_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_group_norm); - GGML_SYCL_DEBUG("call %s done\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -3448,6 +3421,7 @@ bool ggml_sycl_supports_dmmv(enum ggml_type type) { } static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_LOG_DEBUG("[SYCL]: call %s\n", __func__); const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer); int64_t min_compute_capability = INT_MAX; @@ -3587,6 +3561,7 @@ __dpct_inline__ static void k_copy_dst_from_contiguous( static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst) try { + GGML_LOG_DEBUG("SYCL call %s\n", __func__); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers"); const ggml_tensor *ids = dst->src[2]; @@ -3753,11 +3728,15 @@ catch (sycl::exception const &exc) { } static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_scale); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_clamp); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -3810,52 +3789,74 @@ catch (sycl::exception const &exc) { static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { // TODO: why do we pass dst as src1 here? + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_cpy(ctx, src0, dst, nullptr); + GGML_LOG_DEBUG("call %s done\n", __func__); GGML_UNUSED(src1); } static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_diag_mask_inf); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_soft_max); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(src0)); // TODO: this restriction is temporary until non-cont support is implemented + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rope); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pool2d); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_im2col); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum_rows); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argsort); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argmax); + GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_LOG_DEBUG("call %s\n", __func__); + GGML_LOG_DEBUG("call %s done\n", __func__); GGML_UNUSED(src0); GGML_UNUSED(src1); GGML_UNUSED(dst); @@ -3869,13 +3870,11 @@ void ggml_sycl_set_main_device(const int main_device) try { check_allow_gpu_index(main_device); dpct::select_device(main_device); - if (g_ggml_sycl_debug) { - dpct::device_info prop; - SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info( - prop, dpct::dev_mgr::instance().get_device(main_device)))); - GGML_LOG_INFO("Using device %d (%s) as main device\n", - main_device, prop.get_name()); - } + dpct::device_info prop; + SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info( + prop, dpct::dev_mgr::instance().get_device(main_device)))); + GGML_LOG_DEBUG("Using device %d (%s) as main device\n", + main_device, prop.get_name()); } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ @@ -4073,7 +4072,7 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens GGML_API void ggml_backend_sycl_get_device_description(int device, char *description, size_t description_size) try { - GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_description\n"); + GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_device_description\n"); dpct::device_info prop; SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info( prop, dpct::dev_mgr::instance().get_device(device)))); @@ -4087,7 +4086,7 @@ catch (sycl::exception const &exc) { void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total) try { - GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n"); + GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n"); ggml_sycl_set_device(device); /* @@ -4289,7 +4288,7 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) { } int ggml_backend_sycl_get_device_count() { - GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n"); + GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n"); return ggml_sycl_info().device_count; } @@ -4646,17 +4645,14 @@ static ggml_backend_dev_t ggml_backend_sycl_reg_get_device(ggml_backend_reg_t re } static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name) { - GGML_UNUSED(reg); - - // TODO: update to the current function signature - //if (strcmp(name, "ggml_backend_split_buffer_type") == 0) { - // return (void *)ggml_backend_sycl_split_buffer_type; - //} + if (strcmp(name, "ggml_backend_split_buffer_type") == 0) { + return (void *)ggml_backend_sycl_split_buffer_type; + } // SYCL doesn't support registering host memory, left here for reference // "ggml_backend_register_host_buffer" // "ggml_backend_unregister_host_buffer" - GGML_UNUSED(name); + GGML_UNUSED(reg); return nullptr; } @@ -4715,7 +4711,7 @@ ggml_backend_reg_t ggml_backend_sycl_reg() { } ggml_backend_t ggml_backend_sycl_init(int device) { - GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_init\n"); + GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_init\n"); ggml_check_sycl(); check_allow_gpu_index(device); From eeb04751d9749668403aaf78d00482dbe55f1bcd Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Wed, 18 Dec 2024 09:11:17 +0530 Subject: [PATCH 08/11] Revert "SYCL: Integrate debug logs with GGML_LOG and other fixes" This reverts commit 2607b7de0f0d2f4f1f690226f86fa861aa39cb97. Let's keep the current SYCL specific logging mechanism for now --- ggml/src/ggml-sycl/common.cpp | 4 +- ggml/src/ggml-sycl/common.hpp | 11 +- ggml/src/ggml-sycl/element_wise.cpp | 97 +++++++++-------- ggml/src/ggml-sycl/ggml-sycl.cpp | 160 ++++++++++++++-------------- 4 files changed, 141 insertions(+), 131 deletions(-) diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp index 4175fd6b93212..88314a5cd73af 100644 --- a/ggml/src/ggml-sycl/common.cpp +++ b/ggml/src/ggml-sycl/common.cpp @@ -82,8 +82,8 @@ void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *sr ggml_sycl_set_device(ctx.device); queue_ptr main_stream = ctx.stream(); - // GGML_LOG_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n", - // ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device); + // GGML_SYCL_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n", + // ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device); // do the computation op(ctx, src0, src1, dst, src0_ddf, src1_ddf, dst_ddf, main_stream); diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index c18d2acc76db8..62b4cea3ada85 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -31,11 +31,17 @@ #pragma clang diagnostic ignored "-Wnested-anon-types" #include "ggml-common.h" #pragma clang diagnostic pop -#include "ggml-impl.h" void* ggml_sycl_host_malloc(size_t size); void ggml_sycl_host_free(void* ptr); +static int g_ggml_sycl_debug = 0; +#define GGML_SYCL_DEBUG(...) \ + do { \ + if (g_ggml_sycl_debug) \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) + #define CHECK_TRY_ERROR(expr) \ [&]() { \ try { \ @@ -161,7 +167,8 @@ inline dpct::err0 ggml_sycl_set_device(const int device) try { int current_device_id; SYCL_CHECK(CHECK_TRY_ERROR(current_device_id = get_current_device_id())); - GGML_LOG_DEBUG("ggml_sycl_set_device device_id=%d,current_device_id=%d\n", device, current_device_id); + // GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d, + // current_device_id=%d\n", device, current_device); if (device == current_device_id) { return 0; } diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index d64f7bba99ce2..d05a51f807c20 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -1,6 +1,5 @@ #include "common.hpp" #include "element_wise.hpp" -#include "ggml-impl.h" void acc_f32(const float * x, const float * y, float * dst, const int ne, const int ne10, const int ne11, const int ne12, @@ -884,148 +883,148 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 6312374d691c9..089701550b8b2 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -64,6 +64,7 @@ static ggml_sycl_device_info ggml_sycl_init() { #else GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__); #endif + GGML_LOG_INFO("%s: found %d %s devices:\n", __func__, info.device_count, GGML_SYCL_NAME); for (int i = 0; i < info.device_count; ++i) { info.devices[i].vmm = 0; @@ -116,7 +117,7 @@ void print_device_detail(int id, sycl::device &device, std::string device_type) } void ggml_backend_sycl_print_sycl_devices() { - GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n"); + GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n"); int device_count = dpct::dev_mgr::instance().device_count(); std::map DeviceNums; GGML_LOG_INFO("Found %d SYCL devices:\n", device_count); @@ -145,11 +146,27 @@ void ggml_backend_sycl_print_sycl_devices() { } } +static inline int get_sycl_env(const char *env_name, int default_val) { + char *user_device_string = getenv(env_name); + int user_number = default_val; + + unsigned n; + if (user_device_string != NULL && + sscanf(user_device_string, " %u", &n) == 1) { + user_number = (int)n; + } else { + user_number = default_val; + } + return user_number; +} + static void ggml_check_sycl() try { static bool initialized = false; if (!initialized) { - GGML_LOG_DEBUG("[SYCL] call ggml_check_sycl\n"); + GGML_LOG_INFO("[SYCL] call ggml_check_sycl\n"); + g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0); + GGML_LOG_INFO("%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug); #if defined(GGML_SYCL_F16) GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__); @@ -204,7 +221,7 @@ inline void check_allow_gpu_index(const int device_index) { } GGML_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len) try { - GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_gpu_list\n"); + GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_gpu_list\n"); for(int i=0;i lock(mutex); - GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n"); + GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n"); auto dev_count = ggml_backend_sycl_get_device_count(); if (device>=dev_count or device<0) { - GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", + printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", device, dev_count-1); GGML_ASSERT(devicedevice; if (device>=ggml_sycl_info().device_count or device<0) { - GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", + printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", device, ggml_sycl_info().device_count-1); GGML_ASSERT(device lock(mutex); - GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_split_buffer_type\n"); + GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_split_buffer_type\n"); ggml_check_sycl(); // FIXME: this is not thread safe static std::map, struct ggml_backend_buffer_type> buft_map; @@ -1038,7 +1055,7 @@ static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggm } ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() { - GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n"); + GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n"); static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = { /* .iface = */ { /* .get_name = */ ggml_backend_sycl_host_buffer_type_name, @@ -1139,7 +1156,7 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool { (uint32_t)(max_size/1024/1024), (uint32_t)(g_sycl_pool_size[id]/1024/1024), (uint32_t)(size/1024/1024)); #endif - GGML_LOG_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr); + // GGML_SYCL_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr); return ptr; } @@ -2331,31 +2348,41 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, char * src_ptr; if (ggml_backend_buffer_is_host(src->buffer)) { kind = dpct::host_to_device; - GGML_LOG_DEBUG("%s: Host buffer type src tensor: %p\n", __func__, src_ptr); src_ptr = (char *) src->data; + // GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr); } else if (ggml_backend_buffer_is_sycl(src->buffer) || ggml_backend_buffer_is_sycl_split(src->buffer)) { if (!ggml_backend_buffer_is_sycl_split(src->buffer)){ - // If buffer is not single GPU SYCL buffer - GGML_LOG_DEBUG("%s: SYCL buffer type src tensor: %p\n", __func__, src->data); - kind = dpct::device_to_device; + // If buffer is not a SYCL split buffer + /* + What memcpy_direction kind we need here? + Refer: dpct/helper.hpp: + enum memcpy_direction + { + host_to_host, + host_to_device, + device_to_host, + device_to_device, + automatic + }; + */ + kind = dpct::device_to_device; src_ptr = (char *) src->data; } else { /* If buffer is a SYCL split buffer */ - GGML_LOG_DEBUG("%s: Split buffer type src tensor\n", __func__); - GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]); - kind = dpct::device_to_device; - ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra; - int id; - SYCL_CHECK(CHECK_TRY_ERROR( - id = get_current_device_id())); - GGML_LOG_DEBUG("current device index %d\n", id); - src_ptr = (char *) extra->data_device[id]; + GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]); + kind = dpct::device_to_device; + ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra; + int id; + SYCL_CHECK(CHECK_TRY_ERROR( + id = get_current_device_id())); + // GGML_SYCL_DEBUG("current device index %d\n", id); + src_ptr = (char *) extra->data_device[id]; } } else { - GGML_LOG_DEBUG("%s: GGML_ABORT(\"fatal error\")\n", __func__); + // GGML_SYCL_DEBUG("GGML_ABORT("fatal error")\n"); GGML_ABORT("fatal error"); } char * dst_ptr = (char *) dst; @@ -2369,7 +2396,7 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, const char * x = src_ptr + i1_low*nb1 + i2*nb2 + i3*nb3; if (nb0 == ts && nb1 == ts*ne0/bs) { - GGML_LOG_DEBUG("stream->memcpy: dst_ptr=%p, x=%p, size=%lu\n", dst_ptr, x, i1_diff * nb1); + // GGML_SYCL_DEBUG("stream->memcpy: dst_ptr=%p, x=%p, size=%lu\n", dst_ptr, x, i1_diff * nb1); // return CHECK_TRY_ERROR(stream->memcpy(dst_ptr, x, i1_diff * nb1)); return CHECK_TRY_ERROR(dpct::async_dpct_memcpy(dst_ptr, x, i1_diff * nb1, kind, *stream)); @@ -2499,7 +2526,7 @@ inline void ggml_sycl_op_mul_mat_sycl( use_fp16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT) { - GGML_LOG_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n"); + // GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n"); ggml_sycl_pool_alloc src0_as_f16(ctx.pool()); if (src0->type != GGML_TYPE_F16) { const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src0->type); @@ -2546,7 +2573,7 @@ inline void ggml_sycl_op_mul_mat_sycl( #endif } else { - GGML_LOG_DEBUG("ggml_sycl_op_mul_mat_sycl - fp32 path\n"); + // GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp32 path\n"); ggml_sycl_pool_alloc src0_ddq_as_f32(ctx.pool()); ggml_sycl_pool_alloc src1_ddq_as_f32(ctx.pool()); if (src0->type != GGML_TYPE_F32) { @@ -3157,33 +3184,33 @@ catch (sycl::exception const &exc) { static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_repeat); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_get_rows); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_norm); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); + GGML_SYCL_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_group_norm); - GGML_LOG_DEBUG("call %s done\n", __func__); + GGML_SYCL_DEBUG("call %s done\n", __func__); } static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, @@ -3421,7 +3448,6 @@ bool ggml_sycl_supports_dmmv(enum ggml_type type) { } static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("[SYCL]: call %s\n", __func__); const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer); int64_t min_compute_capability = INT_MAX; @@ -3561,7 +3587,6 @@ __dpct_inline__ static void k_copy_dst_from_contiguous( static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst) try { - GGML_LOG_DEBUG("SYCL call %s\n", __func__); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers"); const ggml_tensor *ids = dst->src[2]; @@ -3728,15 +3753,11 @@ catch (sycl::exception const &exc) { } static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_scale); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_clamp); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, @@ -3789,74 +3810,52 @@ catch (sycl::exception const &exc) { static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { // TODO: why do we pass dst as src1 here? - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_cpy(ctx, src0, dst, nullptr); - GGML_LOG_DEBUG("call %s done\n", __func__); GGML_UNUSED(src1); } static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_diag_mask_inf); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_soft_max); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(src0)); // TODO: this restriction is temporary until non-cont support is implemented - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rope); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pool2d); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_im2col); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(src0)); - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(src0)); - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum_rows); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(src0)); - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argsort); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(src0)); - GGML_LOG_DEBUG("call %s\n", __func__); ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argmax); - GGML_LOG_DEBUG("call %s done\n", __func__); } static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - GGML_LOG_DEBUG("call %s\n", __func__); - GGML_LOG_DEBUG("call %s done\n", __func__); GGML_UNUSED(src0); GGML_UNUSED(src1); GGML_UNUSED(dst); @@ -3870,11 +3869,13 @@ void ggml_sycl_set_main_device(const int main_device) try { check_allow_gpu_index(main_device); dpct::select_device(main_device); - dpct::device_info prop; - SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info( - prop, dpct::dev_mgr::instance().get_device(main_device)))); - GGML_LOG_DEBUG("Using device %d (%s) as main device\n", - main_device, prop.get_name()); + if (g_ggml_sycl_debug) { + dpct::device_info prop; + SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info( + prop, dpct::dev_mgr::instance().get_device(main_device)))); + GGML_LOG_INFO("Using device %d (%s) as main device\n", + main_device, prop.get_name()); + } } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ @@ -4072,7 +4073,7 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens GGML_API void ggml_backend_sycl_get_device_description(int device, char *description, size_t description_size) try { - GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_device_description\n"); + GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_description\n"); dpct::device_info prop; SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info( prop, dpct::dev_mgr::instance().get_device(device)))); @@ -4086,7 +4087,7 @@ catch (sycl::exception const &exc) { void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total) try { - GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n"); + GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n"); ggml_sycl_set_device(device); /* @@ -4288,7 +4289,7 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) { } int ggml_backend_sycl_get_device_count() { - GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n"); + GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n"); return ggml_sycl_info().device_count; } @@ -4645,14 +4646,17 @@ static ggml_backend_dev_t ggml_backend_sycl_reg_get_device(ggml_backend_reg_t re } static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name) { - if (strcmp(name, "ggml_backend_split_buffer_type") == 0) { - return (void *)ggml_backend_sycl_split_buffer_type; - } + GGML_UNUSED(reg); + + // TODO: update to the current function signature + //if (strcmp(name, "ggml_backend_split_buffer_type") == 0) { + // return (void *)ggml_backend_sycl_split_buffer_type; + //} // SYCL doesn't support registering host memory, left here for reference // "ggml_backend_register_host_buffer" // "ggml_backend_unregister_host_buffer" - GGML_UNUSED(reg); + GGML_UNUSED(name); return nullptr; } @@ -4711,7 +4715,7 @@ ggml_backend_reg_t ggml_backend_sycl_reg() { } ggml_backend_t ggml_backend_sycl_init(int device) { - GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_init\n"); + GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_init\n"); ggml_check_sycl(); check_allow_gpu_index(device); From 82ce602ee794686223a834d1428111c13f70c0fa Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Wed, 18 Dec 2024 09:19:43 +0530 Subject: [PATCH 09/11] SYCL: Use GGML_SYCL_DEBUG after reverting --- ggml/src/ggml-sycl/ggml-sycl.cpp | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 089701550b8b2..c9d2c28f5e952 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -537,7 +537,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) { auto dev_count = ggml_backend_sycl_get_device_count(); if (device>=dev_count or device<0) { - printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", + GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", device, dev_count-1); GGML_ASSERT(devicedevice; if (device>=ggml_sycl_info().device_count or device<0) { - printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", + GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n", device, ggml_sycl_info().device_count-1); GGML_ASSERT(devicebuffer)) { kind = dpct::host_to_device; + GGML_SYCL_DEBUG("%s: Host buffer type src tensor\n"); src_ptr = (char *) src->data; // GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr); } else if (ggml_backend_buffer_is_sycl(src->buffer) || ggml_backend_buffer_is_sycl_split(src->buffer)) { if (!ggml_backend_buffer_is_sycl_split(src->buffer)){ - // If buffer is not a SYCL split buffer - /* - What memcpy_direction kind we need here? - Refer: dpct/helper.hpp: - enum memcpy_direction - { - host_to_host, - host_to_device, - device_to_host, - device_to_device, - automatic - }; - */ - kind = dpct::device_to_device; + // If buffer is a SYCL buffer + GGML_SYCL_DEBUG("%s: SYCL buffer type src tensor\n", __func__); + kind = dpct::device_to_device; src_ptr = (char *) src->data; } else { /* If buffer is a SYCL split buffer */ + GGML_SYCL_DEBUG("%s: Split buffer type src tensor\n", __func__); GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]); kind = dpct::device_to_device; ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra; From a20dde36ff587625bfd4ed99c1cfec0a36af4746 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Wed, 18 Dec 2024 09:20:52 +0530 Subject: [PATCH 10/11] SYCL: reg_get_proc_address func, update to the current func signature --- ggml/src/ggml-sycl/ggml-sycl.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index c9d2c28f5e952..f058cde1eab8e 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -4639,10 +4639,9 @@ static ggml_backend_dev_t ggml_backend_sycl_reg_get_device(ggml_backend_reg_t re static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name) { GGML_UNUSED(reg); - // TODO: update to the current function signature - //if (strcmp(name, "ggml_backend_split_buffer_type") == 0) { - // return (void *)ggml_backend_sycl_split_buffer_type; - //} + if (strcmp(name, "ggml_backend_split_buffer_type") == 0) { + return (void *)ggml_backend_sycl_split_buffer_type; + } // SYCL doesn't support registering host memory, left here for reference // "ggml_backend_register_host_buffer" From 6be041ae10897c38fdfd045d16b3b69bf9fa38d7 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Thu, 19 Dec 2024 08:34:53 +0530 Subject: [PATCH 11/11] SYCL: Refactor SYCL buffer checks in ggml_sycl_cpy_tensor_2d --- ggml/src/ggml-sycl/ggml-sycl.cpp | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index f058cde1eab8e..ead040ebe7068 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -2348,21 +2348,19 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, char * src_ptr; if (ggml_backend_buffer_is_host(src->buffer)) { kind = dpct::host_to_device; - GGML_SYCL_DEBUG("%s: Host buffer type src tensor\n"); + //GGML_SYCL_DEBUG("%s: Host buffer type src tensor\n", __func__); src_ptr = (char *) src->data; // GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr); - } else if (ggml_backend_buffer_is_sycl(src->buffer) || ggml_backend_buffer_is_sycl_split(src->buffer)) { - if (!ggml_backend_buffer_is_sycl_split(src->buffer)){ - // If buffer is a SYCL buffer - GGML_SYCL_DEBUG("%s: SYCL buffer type src tensor\n", __func__); - kind = dpct::device_to_device; - src_ptr = (char *) src->data; - } - else { - /* - If buffer is a SYCL split buffer - */ - GGML_SYCL_DEBUG("%s: Split buffer type src tensor\n", __func__); + } else if (ggml_backend_buffer_is_sycl(src->buffer)) { + // If buffer is a SYCL buffer + //GGML_SYCL_DEBUG("%s: SYCL buffer type src tensor\n", __func__); + kind = dpct::device_to_device; + src_ptr = (char *) src->data; + } else if (ggml_backend_buffer_is_sycl_split(src->buffer)) { + /* + If buffer is a SYCL split buffer + */ + //GGML_SYCL_DEBUG("%s: Split buffer type src tensor\n", __func__); GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]); kind = dpct::device_to_device; ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra; @@ -2371,7 +2369,6 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst, id = get_current_device_id())); // GGML_SYCL_DEBUG("current device index %d\n", id); src_ptr = (char *) extra->data_device[id]; - } } else { // GGML_SYCL_DEBUG("GGML_ABORT("fatal error")\n"); GGML_ABORT("fatal error");