Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 75f691e

Browse files
committed
Add hipHostMallocCoherent, hipHostMallocNonCoherent
Provide per-allocation control over coherent/non-coherent mem. These overrid the default HIP_COHERENT_HOST_ALLOC setting.
1 parent d0ef9d8 commit 75f691e

6 files changed

Lines changed: 104 additions & 26 deletions

File tree

include/hip/hcc_detail/hip_runtime_api.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,17 +111,21 @@ enum hipLimit_t
111111

112112
//! Flags that can be used with hipHostMalloc
113113
#define hipHostMallocDefault 0x0
114-
#define hipHostMallocPortable 0x1
115-
#define hipHostMallocMapped 0x2
114+
#define hipHostMallocPortable 0x1 ///< Memory is considered allocated by all contexts.
115+
#define hipHostMallocMapped 0x2 ///< Map the allocation into the address space for the current device. The device pointer can be obtained with #hipHostGetDevicePointer.
116116
#define hipHostMallocWriteCombined 0x4
117+
#define hipHostMallocCoherent 0x40000000 ///< Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation.
118+
#define hipHostMallocNonCoherent 0x80000000 ///< Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation.
119+
117120

118121
//! Flags that can be used with hipHostRegister
119122
#define hipHostRegisterDefault 0x0 ///< Memory is Mapped and Portable
120-
#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts. HIP only supports one context so this is always assumed true.
123+
#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts.
121124
#define hipHostRegisterMapped 0x2 ///< Map the allocation into the address space for the current device. The device pointer can be obtained with #hipHostGetDevicePointer.
122125
#define hipHostRegisterIoMemory 0x4 ///< Not supported.
123126

124127

128+
125129
#define hipDeviceScheduleAuto 0x0 ///< Automatically select between Spin and Yield
126130
#define hipDeviceScheduleSpin 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may consume more power.
127131
#define hipDeviceScheduleYield 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers power and is friendlier to other threads in the system.

include/hip/nvcc_detail/hip_runtime_api.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ hipMemcpyHostToHost
6565
#define hipHostMallocPortable cudaHostAllocPortable
6666
#define hipHostMallocMapped cudaHostAllocMapped
6767
#define hipHostMallocWriteCombined cudaHostAllocWriteCombined
68+
#define hipHostMallocCoherent 0x0
69+
#define hipHostMallocNonCoherent 0x0
6870

6971
#define hipHostRegisterPortable cudaHostRegisterPortable
7072
#define hipHostRegisterMapped cudaHostRegisterMapped

src/hip_hcc.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ int HIP_PROFILE_API= 0;
7474
std::string HIP_DB_START_API;
7575
std::string HIP_DB_STOP_API;
7676
int HIP_DB= 0;
77-
int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */
77+
int HIP_VISIBLE_DEVICES = 0;
7878
int HIP_NUM_KERNELS_INFLIGHT = 128;
7979
int HIP_WAIT_MODE = 0;
8080

src/hip_memory.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -267,17 +267,36 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
267267
trueFlags = hipHostMallocMapped | hipHostMallocPortable;
268268
}
269269

270-
const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped | hipHostMallocWriteCombined;
271270

272-
if (flags & ~supportedFlags) {
271+
const unsigned supportedFlags = hipHostMallocPortable
272+
| hipHostMallocMapped
273+
| hipHostMallocWriteCombined
274+
| hipHostMallocCoherent
275+
| hipHostMallocNonCoherent;
276+
277+
278+
const unsigned coherencyFlags = hipHostMallocCoherent | hipHostMallocNonCoherent;
279+
280+
if ((flags & ~supportedFlags) ||
281+
((flags & coherencyFlags) == coherencyFlags)) {
282+
*ptr = nullptr;
283+
// can't specify unsupported flags, can't specify both Coherent + NonCoherent
273284
hip_status = hipErrorInvalidValue;
274-
}
275-
else {
285+
} else {
276286
auto device = ctx->getWriteableDevice();
277-
unsigned amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned;
287+
288+
unsigned amFlags = 0;
289+
if (flags & hipHostMallocCoherent) {
290+
amFlags = amHostCoherent;
291+
} else if (flags & hipHostMallocNonCoherent) {
292+
amFlags = amHostPinned;
293+
} else {
294+
// depends on env variables:
295+
amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned;
296+
}
278297

279298

280-
*ptr = hip_internal::allocAndSharePtr(HIP_COHERENT_HOST_ALLOC ? "finegrained_host":"pinned_host",
299+
*ptr = hip_internal::allocAndSharePtr((amFlags & amHostCoherent) ? "finegrained_host":"pinned_host",
281300
sizeBytes, ctx, (trueFlags & hipHostMallocPortable) /*shareWithAll*/, amFlags, flags);
282301

283302
if(sizeBytes && (*ptr == NULL)){

tests/src/runtimeApi/memory/hipHostMalloc.cpp

Lines changed: 67 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,19 @@
3131
#define LEN 1024*1024
3232
#define SIZE LEN*sizeof(float)
3333

34-
__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){
34+
__global__ void Add(float *Ad, float *Bd, float *Cd){
3535
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
3636
Cd[tx] = Ad[tx] + Bd[tx];
3737
}
3838

39+
40+
__global__ void Set(int *Ad, int val){
41+
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
42+
Ad[tx] = val;
43+
}
44+
3945
int main(){
40-
float *A, *B, *C;
41-
float *Ad, *Bd, *Cd;
46+
4247

4348
hipDeviceProp_t prop;
4449
int device;
@@ -49,26 +54,72 @@ int main(){
4954
failed("Does support HostPinned Memory");
5055
}
5156

52-
HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped));
53-
HIPCHECK(hipHostMalloc((void**)&B, SIZE, hipHostMallocDefault));
54-
HIPCHECK(hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped));
5557

56-
HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0));
57-
HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0));
58+
{
59+
float *A, *B, *C;
60+
float *Ad, *Bd, *Cd;
61+
HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped));
62+
HIPCHECK(hipHostMalloc((void**)&B, SIZE, hipHostMallocDefault));
63+
HIPCHECK(hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped));
64+
65+
HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0));
66+
HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0));
67+
68+
for(int i=0;i<LEN;i++){
69+
A[i] = 1.0f;
70+
B[i] = 2.0f;
71+
}
72+
73+
HIPCHECK(hipMalloc((void**)&Bd, SIZE));
74+
HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
75+
76+
dim3 dimGrid(LEN/512,1,1);
77+
dim3 dimBlock(512,1,1);
5878

59-
for(int i=0;i<LEN;i++){
60-
A[i] = 1.0f;
61-
B[i] = 2.0f;
79+
hipLaunchKernelGGL(Add, dimGrid, dimBlock, 0, 0, Ad, Bd, Cd);
80+
81+
HIPCHECK(hipDeviceSynchronize());
82+
83+
HIPCHECK(hipHostFree(A));
84+
HIPCHECK(hipHostFree(B));
85+
HIPCHECK(hipHostFree(C));
6286
}
6387

64-
HIPCHECK(hipMalloc((void**)&Bd, SIZE));
65-
HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
88+
{
89+
int *A, *B;
90+
int numElements = 1024*16;
91+
size_t sizeBytes = numElements * sizeof (int);
92+
#ifdef __HIP_PLATFORM_HCC__
93+
HIPCHECK_API(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent|hipHostMallocNonCoherent), hipErrorInvalidValue);
94+
95+
assert (A == 0);
96+
#endif
97+
98+
HIPCHECK(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent));
99+
hipStream_t s;
100+
hipEvent_t e;
66101

67-
dim3 dimGrid(LEN/512,1,1);
68-
dim3 dimBlock(512,1,1);
102+
// Init:
103+
HIPCHECK(hipStreamCreate(&s));
104+
HIPCHECK(hipEventCreateWithFlags(&e, 0));
105+
dim3 dimBlock(64,1,1);
106+
dim3 dimGrid(numElements/dimBlock.x,1,1);
69107

70-
hipLaunchKernel(HIP_KERNEL_NAME(Add), dimGrid, dimBlock, 0, 0, Ad, Bd, Cd);
108+
// Init array to know state:
109+
hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, 0x0, A, -42);
110+
HIPCHECK(hipDeviceSynchronize());
71111

112+
hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, s, A, 13);
113+
HIPCHECK(hipEventRecord(e, s));
114+
115+
// Host waits for event :
116+
HIPCHECK(hipEventSynchronize(e));
117+
118+
// check result?
119+
120+
HIPCHECK(hipHostMalloc((void**)&B, sizeBytes, hipHostMallocNonCoherent));
121+
}
122+
72123
passed();
73124

74125
}

util/vim/hip.vim

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ syn keyword hipFlags hipHostMallocDefault
185185
syn keyword hipFlags hipHostMallocPortable
186186
syn keyword hipFlags hipHostMallocMapped
187187
syn keyword hipFlags hipHostMallocWriteCombined
188+
syn keyword hipFlags hipHostMallocCoherent
189+
syn keyword hipFlags hipHostMallocNonCoherent
188190

189191
syn keyword hipFlags hipHostRegisterDefault
190192
syn keyword hipFlags hipHostRegisterPortable

0 commit comments

Comments
 (0)