@@ -288,10 +288,8 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
288
288
ggml_tensor *tensor) try {
289
289
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context ;
290
290
291
- if (tensor->view_src != NULL && tensor-> view_offs == 0 ) {
291
+ if (tensor->view_src != NULL ) {
292
292
assert (tensor->view_src ->buffer ->buft == buffer->buft );
293
- tensor->backend = tensor->view_src ->backend ;
294
- tensor->extra = tensor->view_src ->extra ;
295
293
return ;
296
294
}
297
295
@@ -539,7 +537,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
539
537
auto dev_count = ggml_backend_sycl_get_device_count ();
540
538
541
539
if (device>=dev_count or device<0 ) {
542
- printf (" ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n " ,
540
+ GGML_LOG_ERROR (" ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n " ,
543
541
device, dev_count-1 );
544
542
GGML_ASSERT (device<dev_count);
545
543
}
@@ -567,7 +565,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(ggml_backend_sycl_conte
567
565
568
566
int device = ctx->device ;
569
567
if (device>=ggml_sycl_info ().device_count or device<0 ) {
570
- printf (" ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n " ,
568
+ GGML_LOG_ERROR (" ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n " ,
571
569
device, ggml_sycl_info ().device_count -1 );
572
570
GGML_ASSERT (device<ggml_sycl_info ().device_count );
573
571
}
@@ -746,7 +744,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
746
744
size += ggml_row_size (tensor->type , MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
747
745
}
748
746
749
- // FIXME: do not crash if cudaMalloc fails
747
+ // FIXME: do not crash if SYCL Buffer alloc fails
750
748
// currently, init_tensor cannot fail, it needs to be fixed in ggml-backend first
751
749
ggml_sycl_set_device (i);
752
750
const queue_ptr stream = ctx->streams [i];
@@ -788,7 +786,6 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
788
786
CHECK_TRY_ERROR (extra->events [i][is] = new sycl::event ()));
789
787
}
790
788
}
791
- tensor->backend = GGML_BACKEND_TYPE_GPU_SPLIT;
792
789
tensor->extra = extra;
793
790
}
794
791
catch (sycl::exception const &exc) {
@@ -2349,12 +2346,22 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
2349
2346
2350
2347
dpct::memcpy_direction kind;
2351
2348
char * src_ptr;
2352
- if (src->backend == GGML_BACKEND_TYPE_CPU ) {
2349
+ if (ggml_backend_buffer_is_host ( src->buffer ) ) {
2353
2350
kind = dpct::host_to_device;
2351
+ // GGML_SYCL_DEBUG("%s: Host buffer type src tensor\n", __func__);
2354
2352
src_ptr = (char *) src->data ;
2355
2353
// GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr);
2356
- } else if (src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
2357
- GGML_ASSERT (src->backend != GGML_BACKEND_TYPE_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne [1 ]));
2354
+ } else if (ggml_backend_buffer_is_sycl (src->buffer )) {
2355
+ // If buffer is a SYCL buffer
2356
+ // GGML_SYCL_DEBUG("%s: SYCL buffer type src tensor\n", __func__);
2357
+ kind = dpct::device_to_device;
2358
+ src_ptr = (char *) src->data ;
2359
+ } else if (ggml_backend_buffer_is_sycl_split (src->buffer )) {
2360
+ /*
2361
+ If buffer is a SYCL split buffer
2362
+ */
2363
+ // GGML_SYCL_DEBUG("%s: Split buffer type src tensor\n", __func__);
2364
+ GGML_ASSERT (i1_low == 0 && i1_high == src->ne [1 ]);
2358
2365
kind = dpct::device_to_device;
2359
2366
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra ;
2360
2367
int id;
@@ -2857,8 +2864,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
2857
2864
const int nb2 = dst->nb [2 ];
2858
2865
const int nb3 = dst->nb [3 ];
2859
2866
2860
- GGML_ASSERT (dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT );
2861
- GGML_ASSERT (src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT );
2867
+ GGML_ASSERT (! ggml_backend_buffer_is_sycl_split ( dst->buffer ) );
2868
+ GGML_ASSERT (! ggml_backend_buffer_is_sycl_split ( src1->buffer ) );
2862
2869
GGML_ASSERT (src1->type == GGML_TYPE_F32 || (src1->ne [2 ] == 1 && src1->ne [3 ] == 1 ));
2863
2870
2864
2871
GGML_ASSERT (ne12 >= ne02 && ne12 % ne02 == 0 );
@@ -2878,7 +2885,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
2878
2885
2879
2886
int64_t src1_padded_col_size = GGML_PAD (ne10, MATRIX_ROW_PADDING);
2880
2887
2881
- const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT ;
2888
+ const bool split = ggml_backend_buffer_is_sycl_split ( src0->buffer ) ;
2882
2889
GGML_ASSERT (!(split && ne02 > 1 ));
2883
2890
GGML_ASSERT (!(split && ne03 > 1 ));
2884
2891
GGML_ASSERT (!(split && ne02 < ne12));
@@ -3198,7 +3205,7 @@ static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const gg
3198
3205
const ggml_tensor *src1,
3199
3206
ggml_tensor *dst) try {
3200
3207
GGML_ASSERT (ggml_is_permuted (src0) && ggml_is_permuted (src1));
3201
- GGML_ASSERT (src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT );
3208
+ GGML_ASSERT (! ggml_backend_buffer_is_sycl_split ( src0->buffer ) );
3202
3209
GGML_ASSERT (src0->nb [0 ] <= src0->nb [1 ] && src0->nb [2 ] <= src0->nb [3 ]); // 0213 permutation
3203
3210
GGML_ASSERT (src1->nb [0 ] <= src1->nb [1 ] && src1->nb [2 ] <= src1->nb [3 ]); // 0213 permutation
3204
3211
GGML_ASSERT (src0->type == GGML_TYPE_F16);
@@ -3231,7 +3238,7 @@ static void ggml_sycl_mul_mat_vec_nc(ggml_backend_sycl_context & ctx, const ggml
3231
3238
GGML_ASSERT (!ggml_is_transposed (src0));
3232
3239
GGML_ASSERT (!ggml_is_transposed (src1));
3233
3240
GGML_ASSERT (!ggml_is_permuted (src0));
3234
- GGML_ASSERT (src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT );
3241
+ GGML_ASSERT (! ggml_backend_buffer_is_sycl_split ( src0->buffer ) );
3235
3242
GGML_ASSERT (src0->type == GGML_TYPE_F16);
3236
3243
GGML_ASSERT (src1->type == GGML_TYPE_F32);
3237
3244
@@ -3293,7 +3300,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx,
3293
3300
ggml_tensor *dst) try {
3294
3301
GGML_ASSERT (!ggml_is_transposed (src0));
3295
3302
GGML_ASSERT (!ggml_is_transposed (src1));
3296
- GGML_ASSERT (src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT );
3303
+ GGML_ASSERT (! ggml_backend_buffer_is_sycl_split ( src0->buffer ) );
3297
3304
GGML_ASSERT (src0->type == GGML_TYPE_F16);
3298
3305
3299
3306
GGML_TENSOR_BINARY_OP_LOCALS
@@ -4638,10 +4645,9 @@ static ggml_backend_dev_t ggml_backend_sycl_reg_get_device(ggml_backend_reg_t re
4638
4645
static void *ggml_backend_sycl_reg_get_proc_address (ggml_backend_reg_t reg, const char *name) {
4639
4646
GGML_UNUSED (reg);
4640
4647
4641
- // TODO: update to the current function signature
4642
- // if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
4643
- // return (void *)ggml_backend_sycl_split_buffer_type;
4644
- // }
4648
+ if (strcmp (name, " ggml_backend_split_buffer_type" ) == 0 ) {
4649
+ return (void *)ggml_backend_sycl_split_buffer_type;
4650
+ }
4645
4651
4646
4652
// SYCL doesn't support registering host memory, left here for reference
4647
4653
// "ggml_backend_register_host_buffer"
0 commit comments