33#include " test_common.h"
44
55/* HIT_START
6- * BUILD: %t %s ../../test_common.cpp
6+ * BUILD: %t %s ../../test_common.cpp CLANG_OPTIONS -g -O0
77 * TEST: %t -N 256M
88 * HIT_END
99 */
@@ -20,32 +20,66 @@ void add(int n, float *x, float *y)
2020int main (int argc, char *argv[])
2121{
2222 HipTest::parseStandardArguments (argc, argv, true );
23- int numElements = N;
23+
24+ printf (" info: set device to %d\n " , p_gpuDevice);
25+ HIPCHECK (hipSetDevice (p_gpuDevice));
26+
27+ int numElements = (N < (64 * 1024 * 1024 )) ? 64 * 1024 * 1024 : N;
2428 bool testResult = true ;
2529 float *A, *B;
2630
27- hipMallocManaged (&A, numElements*sizeof (float ));
28- hipMallocManaged (&B, numElements*sizeof (float ));
31+ HIPCHECK ( hipMallocManaged (&A, numElements*sizeof (float ) ));
32+ HIPCHECK ( hipMallocManaged (&B, numElements*sizeof (float ) ));
2933
3034 for (int i = 0 ; i < numElements; i++) {
3135 A[i] = 1 .0f ;
3236 B[i] = 2 .0f ;
3337 }
3438
39+ hipDevice_t device = hipCpuDeviceId;
40+
41+ HIPCHECK (hipMemAdvise (A, numElements*sizeof (float ), hipMemAdviseSetReadMostly, device));
42+ HIPCHECK (hipMemPrefetchAsync (A, numElements*sizeof (float ), 0 ));
43+ HIPCHECK (hipMemPrefetchAsync (B, numElements*sizeof (float ), 0 ));
44+ HIPCHECK (hipDeviceSynchronize ());
45+ HIPCHECK (hipMemRangeGetAttribute (&device, sizeof (device), hipMemRangeAttributeLastPrefetchLocation, A, numElements*sizeof (float )));
46+ if (device != p_gpuDevice) {
47+ printf (" hipMemRangeGetAttribute error, device = %d!\n " , device);
48+ }
49+ uint32_t read_only = 0xf ;
50+ HIPCHECK (hipMemRangeGetAttribute (&read_only, sizeof (read_only), hipMemRangeAttributeReadMostly, A, numElements*sizeof (float )));
51+ if (read_only != 1 ) {
52+ printf (" hipMemRangeGetAttribute error, read_only = %d!\n " , read_only);
53+ }
54+
3555 int blockSize = 256 ;
3656 int numBlocks = (numElements + blockSize - 1 ) / blockSize;
3757 dim3 dimGrid (numBlocks, 1 , 1 );
3858 dim3 dimBlock (blockSize, 1 , 1 );
59+ hipEvent_t event0, event1;
60+ HIPCHECK (hipEventCreate (&event0));
61+ HIPCHECK (hipEventCreate (&event1));
62+ HIPCHECK (hipEventRecord (event0, 0 ));
3963 hipLaunchKernelGGL (add, dimGrid, dimBlock, 0 , 0 , numElements, A, B);
40-
41- hipDeviceSynchronize ();
64+ HIPCHECK (hipEventRecord (event1, 0 ));
65+ HIPCHECK (hipDeviceSynchronize ());
66+ float time = 0 .0f ;
67+ HIPCHECK (hipEventElapsedTime (&time, event0, event1));
68+ printf (" Time %.3f ms\n " , time);
4269
4370 float maxError = 0 .0f ;
71+ HIPCHECK (hipMemPrefetchAsync (B, numElements*sizeof (float ), hipCpuDeviceId));
72+ HIPCHECK (hipDeviceSynchronize ());
73+ device = p_gpuDevice;
74+ HIPCHECK (hipMemRangeGetAttribute (&device, sizeof (device), hipMemRangeAttributeLastPrefetchLocation, A, numElements*sizeof (float )));
75+ if (device != hipCpuDeviceId) {
76+ printf (" hipMemRangeGetAttribute error (CPU device is expected), device = %d!\n " , device);
77+ }
4478 for (int i = 0 ; i < numElements; i++)
4579 maxError = fmax (maxError, fabs (B[i]-3 .0f ));
4680
47- hipFree (A);
48- hipFree (B);
81+ HIPCHECK ( hipFree (A) );
82+ HIPCHECK ( hipFree (B) );
4983 if (maxError == 0 .0f )
5084 passed ();
5185 failed (" Output Mismatch\n " );
0 commit comments