Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit eb5c3dc

Browse files
authored
SYCL: Migrate away from deprecated ggml_tensor->backend (#10840)
* Migrate to tensor->buffer for checking backend buffer type: 1 * SYCL: common.cpp try to migrate away from tensor->backend * SYCL: fix assertions and add proper comments * SYCL: remove extra space * SYCL: Add back static to ggml_backend_buffer_is_sycl_split function * SYCL: Add pragma directive to suppress warning spam * SYCL: Integrate debug logs with GGML_LOG and other fixes * Revert "SYCL: Integrate debug logs with GGML_LOG and other fixes" This reverts commit 2607b7d. Let's keep the current SYCL specific logging mechanism for now * SYCL: Use GGML_SYCL_DEBUG after reverting * SYCL: reg_get_proc_address func, update to the current func signature * SYCL: Refactor SYCL buffer checks in ggml_sycl_cpy_tensor_2d
1 parent 0ca416c commit eb5c3dc

File tree

3 files changed

+35
-23
lines changed

3 files changed

+35
-23
lines changed

ggml/src/ggml-sycl/common.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
//
1212

1313
#include "common.hpp"
14+
15+
#include "ggml-backend-impl.h"
1416
#include "ggml-impl.h"
1517

1618
int get_current_device_id() {
@@ -65,9 +67,9 @@ void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
6567
const ggml_sycl_op_flatten_t op) try {
6668

6769
const bool use_src1 = src1 != nullptr;
68-
69-
GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
70-
GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
70+
if(use_src1)
71+
GGML_ASSERT(strcmp(src1->buffer->buft->iface.get_name(src1->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
72+
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
7173

7274
// dd = data device
7375
float * src0_ddf = (float *) src0->data;

ggml/src/ggml-sycl/common.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,11 @@
2626

2727
#define GGML_COMMON_DECL_SYCL
2828
#define GGML_COMMON_IMPL_SYCL
29+
/* suppress warning spam */
30+
#pragma clang diagnostic push
31+
#pragma clang diagnostic ignored "-Wnested-anon-types"
2932
#include "ggml-common.h"
33+
#pragma clang diagnostic pop
3034

3135
void* ggml_sycl_host_malloc(size_t size);
3236
void ggml_sycl_host_free(void* ptr);

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -288,10 +288,8 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
288288
ggml_tensor *tensor) try {
289289
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
290290

291-
if (tensor->view_src != NULL && tensor->view_offs == 0) {
291+
if (tensor->view_src != NULL) {
292292
assert(tensor->view_src->buffer->buft == buffer->buft);
293-
tensor->backend = tensor->view_src->backend;
294-
tensor->extra = tensor->view_src->extra;
295293
return;
296294
}
297295

@@ -539,7 +537,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
539537
auto dev_count = ggml_backend_sycl_get_device_count();
540538

541539
if (device>=dev_count or device<0) {
542-
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
540+
GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
543541
device, dev_count-1);
544542
GGML_ASSERT(device<dev_count);
545543
}
@@ -567,7 +565,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(ggml_backend_sycl_conte
567565

568566
int device = ctx->device;
569567
if (device>=ggml_sycl_info().device_count or device<0) {
570-
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
568+
GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
571569
device, ggml_sycl_info().device_count-1);
572570
GGML_ASSERT(device<ggml_sycl_info().device_count);
573571
}
@@ -746,7 +744,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
746744
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
747745
}
748746

749-
// FIXME: do not crash if cudaMalloc fails
747+
// FIXME: do not crash if SYCL Buffer alloc fails
750748
// currently, init_tensor cannot fail, it needs to be fixed in ggml-backend first
751749
ggml_sycl_set_device(i);
752750
const queue_ptr stream = ctx->streams[i];
@@ -788,7 +786,6 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
788786
CHECK_TRY_ERROR(extra->events[i][is] = new sycl::event()));
789787
}
790788
}
791-
tensor->backend = GGML_BACKEND_TYPE_GPU_SPLIT;
792789
tensor->extra = extra;
793790
}
794791
catch (sycl::exception const &exc) {
@@ -2349,12 +2346,22 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
23492346

23502347
dpct::memcpy_direction kind;
23512348
char * src_ptr;
2352-
if (src->backend == GGML_BACKEND_TYPE_CPU) {
2349+
if (ggml_backend_buffer_is_host(src->buffer)) {
23532350
kind = dpct::host_to_device;
2351+
//GGML_SYCL_DEBUG("%s: Host buffer type src tensor\n", __func__);
23542352
src_ptr = (char *) src->data;
23552353
// GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr);
2356-
} else if (src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
2357-
GGML_ASSERT(src->backend != GGML_BACKEND_TYPE_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne[1]));
2354+
} else if (ggml_backend_buffer_is_sycl(src->buffer)) {
2355+
// If buffer is a SYCL buffer
2356+
//GGML_SYCL_DEBUG("%s: SYCL buffer type src tensor\n", __func__);
2357+
kind = dpct::device_to_device;
2358+
src_ptr = (char *) src->data;
2359+
} else if (ggml_backend_buffer_is_sycl_split(src->buffer)) {
2360+
/*
2361+
If buffer is a SYCL split buffer
2362+
*/
2363+
//GGML_SYCL_DEBUG("%s: Split buffer type src tensor\n", __func__);
2364+
GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]);
23582365
kind = dpct::device_to_device;
23592366
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
23602367
int id;
@@ -2857,8 +2864,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
28572864
const int nb2 = dst->nb[2];
28582865
const int nb3 = dst->nb[3];
28592866

2860-
GGML_ASSERT(dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
2861-
GGML_ASSERT(src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
2867+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
2868+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src1->buffer));
28622869
GGML_ASSERT(src1->type == GGML_TYPE_F32 || (src1->ne[2] == 1 && src1->ne[3] == 1));
28632870

28642871
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
@@ -2878,7 +2885,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
28782885

28792886
int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
28802887

2881-
const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
2888+
const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer);
28822889
GGML_ASSERT(!(split && ne02 > 1));
28832890
GGML_ASSERT(!(split && ne03 > 1));
28842891
GGML_ASSERT(!(split && ne02 < ne12));
@@ -3198,7 +3205,7 @@ static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const gg
31983205
const ggml_tensor *src1,
31993206
ggml_tensor *dst) try {
32003207
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
3201-
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
3208+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
32023209
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
32033210
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
32043211
GGML_ASSERT(src0->type == GGML_TYPE_F16);
@@ -3231,7 +3238,7 @@ static void ggml_sycl_mul_mat_vec_nc(ggml_backend_sycl_context & ctx, const ggml
32313238
GGML_ASSERT(!ggml_is_transposed(src0));
32323239
GGML_ASSERT(!ggml_is_transposed(src1));
32333240
GGML_ASSERT(!ggml_is_permuted(src0));
3234-
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
3241+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
32353242
GGML_ASSERT(src0->type == GGML_TYPE_F16);
32363243
GGML_ASSERT(src1->type == GGML_TYPE_F32);
32373244

@@ -3293,7 +3300,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx,
32933300
ggml_tensor *dst) try {
32943301
GGML_ASSERT(!ggml_is_transposed(src0));
32953302
GGML_ASSERT(!ggml_is_transposed(src1));
3296-
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
3303+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
32973304
GGML_ASSERT(src0->type == GGML_TYPE_F16);
32983305

32993306
GGML_TENSOR_BINARY_OP_LOCALS
@@ -4638,10 +4645,9 @@ static ggml_backend_dev_t ggml_backend_sycl_reg_get_device(ggml_backend_reg_t re
46384645
static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name) {
46394646
GGML_UNUSED(reg);
46404647

4641-
// TODO: update to the current function signature
4642-
//if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
4643-
// return (void *)ggml_backend_sycl_split_buffer_type;
4644-
//}
4648+
if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
4649+
return (void *)ggml_backend_sycl_split_buffer_type;
4650+
}
46454651

46464652
// SYCL doesn't support registering host memory, left here for reference
46474653
// "ggml_backend_register_host_buffer"

0 commit comments

Comments
 (0)