@@ -309,12 +309,14 @@ void generic_copy(void* __restrict dst, const void* __restrict src, size_t n,
309309 if (di.size == is_cpu_owned) return d2h_copy (dst, src, n, si);
310310 if (si.size == is_cpu_owned) return h2d_copy (dst, src, n, di);
311311
312- throwing_result_check (hsa_amd_agents_allow_access (1u , &si.agentOwner ,
313- nullptr ,
314- di.agentBaseAddress ),
315- __FILE__, __func__, __LINE__);
316-
317- return do_copy (dst, src, n, di.agentOwner , si.agentOwner );
312+ hsa_status_t res = hsa_amd_agents_allow_access (1u , &si.agentOwner ,
313+ nullptr , di.agentBaseAddress );
314+ if (res == HSA_STATUS_SUCCESS ){
315+ return do_copy (dst, src, n, di.agentOwner , si.agentOwner );
316+ }
317+ // If devices do not have access then fallback mechanism will be used
318+ // copy will be slower
319+ throwing_result_check (hsa_memory_copy (dst,src,n), __FILE__, __func__, __LINE__);
318320}
319321
320322inline
@@ -341,11 +343,16 @@ void memcpy_impl(void* __restrict dst, const void* __restrict src, size_t n,
341343 case hipMemcpyHostToDevice: return h2d_copy (dst, src, n, di);
342344 case hipMemcpyDeviceToHost: return d2h_copy (dst, src, n, si);
343345 case hipMemcpyDeviceToDevice: {
344- throwing_result_check (hsa_amd_agents_allow_access (1u , &si.agentOwner ,
345- nullptr ,
346- di.agentBaseAddress ),
347- __FILE__, __func__, __LINE__);
348- return do_copy (dst, src, n, di.agentOwner , si.agentOwner );
346+ hsa_status_t res = hsa_amd_agents_allow_access (1u , &si.agentOwner ,
347+ nullptr , di.agentBaseAddress );
348+ if (res == HSA_STATUS_SUCCESS ){
349+ return do_copy (dst, src, n, di.agentOwner , si.agentOwner );
350+ }
351+
352+ // If devices do not have access then fallback mechanism will be used
353+ // copy will be slower
354+ throwing_result_check (hsa_memory_copy (dst,src,n), __FILE__, __func__, __LINE__);
355+ break ;
349356 }
350357 default : return generic_copy (dst, src, n, di, si);
351358 }
0 commit comments