Merge pull request PacktPublishing#16 from PacktPublishing/reduction-fix

haanjack · web-flow · commit 10bd9b59bfcc · 2023-12-30T17:15:53.000+09:00
fixed reduction's wrong argument reference
diff --git a/Chapter03/03_cuda_thread_programming/06_limiter_balancing/reduction_kernel.cu b/Chapter03/03_cuda_thread_programming/06_limiter_balancing/reduction_kernel.cu
@@ -45,7 +45,7 @@ int reduction(float *g_outPtr, float *g_inPtr, int size, int n_threads)
     int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);
 
     reduction_kernel<<<n_blocks, n_threads, n_threads * sizeof(float), 0>>>(g_outPtr, g_inPtr, size);
-    reduction_kernel<<<1, n_threads, n_threads * sizeof(float), 0>>>(g_outPtr, g_inPtr, n_blocks);
+    reduction_kernel<<<1, n_threads, n_threads * sizeof(float), 0>>>(g_outPtr, g_outPtr, n_blocks);
 
     return 1;
-}
+}
diff --git a/Chapter03/03_cuda_thread_programming/06_limiter_balancing/reduction_kernel_opt.cu b/Chapter03/03_cuda_thread_programming/06_limiter_balancing/reduction_kernel_opt.cu
@@ -52,7 +52,7 @@ int reduction(float *g_outPtr, float *g_inPtr, int size, int n_threads)
     int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);
 
     reduction_kernel<<<n_blocks, n_threads, n_threads * sizeof(float), 0>>>(g_outPtr, g_inPtr, size);
-    reduction_kernel<<<1, n_threads, n_threads * sizeof(float), 0>>>(g_outPtr, g_inPtr, n_blocks);
+    reduction_kernel<<<1, n_threads, n_threads * sizeof(float), 0>>>(g_outPtr, g_outPtr, n_blocks);
 
     return 1;
-}
+}
diff --git a/Chapter03/03_cuda_thread_programming/07_warp_synchronous_programming/reduction_wp_kernel.cu b/Chapter03/03_cuda_thread_programming/07_warp_synchronous_programming/reduction_wp_kernel.cu
@@ -79,5 +79,5 @@ void reduction(float *g_outPtr, float *g_inPtr, int size, int n_threads)
     int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);
 
     reduction_kernel<<<n_blocks, n_threads>>>(g_outPtr, g_inPtr, size);
-    reduction_kernel<<< 1, n_threads, n_threads * sizeof(float), 0 >>>(g_outPtr, g_inPtr, n_blocks);
+    reduction_kernel<<< 1, n_threads, n_threads * sizeof(float), 0 >>>(g_outPtr, g_outPtr, n_blocks);
 }
diff --git a/Chapter03/03_cuda_thread_programming/08_cooperative_group/reduction_cg_kernel.cu b/Chapter03/03_cuda_thread_programming/08_cooperative_group/reduction_cg_kernel.cu
@@ -63,5 +63,5 @@ void reduction(float *g_outPtr, float *g_inPtr, int size, int n_threads)
     int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);
 
     reduction_kernel<<< n_blocks, n_threads, n_threads * sizeof(float), 0 >>>(g_outPtr, g_inPtr, size);
-    reduction_kernel<<< 1, n_threads, n_threads * sizeof(float), 0 >>>(g_outPtr, g_inPtr, n_blocks);
+    reduction_kernel<<< 1, n_threads, n_threads * sizeof(float), 0 >>>(g_outPtr, g_outPtr, n_blocks);
 }
diff --git a/Chapter03/03_cuda_thread_programming/08_cooperative_group/reduction_cg_shift_kernel.cu b/Chapter03/03_cuda_thread_programming/08_cooperative_group/reduction_cg_shift_kernel.cu
@@ -85,5 +85,5 @@ void reduction(float *g_outPtr, float *g_inPtr, int size, int n_threads)
     int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);
 
     reduction_kernel<<<n_blocks, n_threads>>>(g_outPtr, g_inPtr, size);
-    reduction_kernel<<< 1, n_threads >>>(g_outPtr, g_inPtr, n_blocks);
+    reduction_kernel<<< 1, n_threads >>>(g_outPtr, g_outPtr, n_blocks);
 }
diff --git a/Chapter03/03_cuda_thread_programming/09_loop_unrolling/reduction_cg_kernel.cu b/Chapter03/03_cuda_thread_programming/09_loop_unrolling/reduction_cg_kernel.cu
@@ -80,5 +80,5 @@ void reduction(float *g_outPtr, float *g_inPtr, int size, int n_threads)
     int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);
 
     reduction_kernel<<< n_blocks, n_threads>>>(g_outPtr, g_inPtr, size);
-    reduction_kernel<<< 1, n_threads >>>(g_outPtr, g_inPtr, n_blocks);
+    reduction_kernel<<< 1, n_threads >>>(g_outPtr, g_outPtr, n_blocks);
 }
diff --git a/Chapter03/03_cuda_thread_programming/09_loop_unrolling/reduction_wp_kernel.cu b/Chapter03/03_cuda_thread_programming/09_loop_unrolling/reduction_wp_kernel.cu
@@ -85,5 +85,5 @@ void reduction(float *g_outPtr, float *g_inPtr, int size, int n_threads)
     int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);
 
     reduction_kernel<<< n_blocks, n_threads>>>(g_outPtr, g_inPtr, size);
-    reduction_kernel<<< 1, n_threads >>>(g_outPtr, g_inPtr, n_blocks);
+    reduction_kernel<<< 1, n_threads >>>(g_outPtr, g_outPtr, n_blocks);
 }

Original file line number	Diff line number	Diff line change
`@@ -79,5 +79,5 @@ void reduction(float g_outPtr, float g_inPtr, int size, int n_threads)`
`79`	`79`	`int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);`
`80`	`80`
`81`	`81`	`reduction_kernel<<<n_blocks, n_threads>>>(g_outPtr, g_inPtr, size);`
`82`		`- reduction_kernel<<< 1, n_threads, n_threads * sizeof(float), 0 >>>(g_outPtr, g_inPtr, n_blocks);`
	`82`	`+ reduction_kernel<<< 1, n_threads, n_threads * sizeof(float), 0 >>>(g_outPtr, g_outPtr, n_blocks);`
`83`	`83`	`}`
Original file line number	Diff line number	Diff line change
`@@ -63,5 +63,5 @@ void reduction(float g_outPtr, float g_inPtr, int size, int n_threads)`
`63`	`63`	`int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);`
`64`	`64`
`65`	`65`	`reduction_kernel<<< n_blocks, n_threads, n_threads * sizeof(float), 0 >>>(g_outPtr, g_inPtr, size);`
`66`		`- reduction_kernel<<< 1, n_threads, n_threads * sizeof(float), 0 >>>(g_outPtr, g_inPtr, n_blocks);`
	`66`	`+ reduction_kernel<<< 1, n_threads, n_threads * sizeof(float), 0 >>>(g_outPtr, g_outPtr, n_blocks);`
`67`	`67`	`}`
Original file line number	Diff line number	Diff line change
`@@ -85,5 +85,5 @@ void reduction(float g_outPtr, float g_inPtr, int size, int n_threads)`
`85`	`85`	`int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);`
`86`	`86`
`87`	`87`	`reduction_kernel<<<n_blocks, n_threads>>>(g_outPtr, g_inPtr, size);`
`88`		`- reduction_kernel<<< 1, n_threads >>>(g_outPtr, g_inPtr, n_blocks);`
	`88`	`+ reduction_kernel<<< 1, n_threads >>>(g_outPtr, g_outPtr, n_blocks);`
`89`	`89`	`}`
Original file line number	Diff line number	Diff line change
`@@ -80,5 +80,5 @@ void reduction(float g_outPtr, float g_inPtr, int size, int n_threads)`
`80`	`80`	`int n_blocks = min(num_blocks_per_sm * num_sms, (size + n_threads - 1) / n_threads);`
`81`	`81`
`82`	`82`	`reduction_kernel<<< n_blocks, n_threads>>>(g_outPtr, g_inPtr, size);`
`83`		`- reduction_kernel<<< 1, n_threads >>>(g_outPtr, g_inPtr, n_blocks);`
	`83`	`+ reduction_kernel<<< 1, n_threads >>>(g_outPtr, g_outPtr, n_blocks);`
`84`	`84`	`}`