@@ -644,39 +644,45 @@ gpu.module @test_module_29 {
644644
645645gpu.module @test_module_30 {
646646 // CHECK-LABEL: func @subgroup_reduce_add
647- gpu.func @subgroup_reduce_add (%arg0 : i32 ) {
647+ gpu.func @subgroup_reduce_add (%arg0 : i32 , %buf : memref < i32 > ) {
648648 // CHECK: nvvm.redux.sync add {{.*}}
649649 %result = gpu.subgroup_reduce add %arg0 uniform {} : (i32 ) -> (i32 )
650+ memref.store %result , %buf [] : memref <i32 >
650651 gpu.return
651652 }
652653 // CHECK-LABEL: @subgroup_reduce_minsi
653- gpu.func @subgroup_reduce_minsi (%arg0 : i32 ) {
654+ gpu.func @subgroup_reduce_minsi (%arg0 : i32 , %buf : memref < i32 > ) {
654655 // CHECK: nvvm.redux.sync min {{.*}}
655656 %result = gpu.subgroup_reduce minsi %arg0 uniform {} : (i32 ) -> (i32 )
657+ memref.store %result , %buf [] : memref <i32 >
656658 gpu.return
657659 }
658660 // CHECK-LABEL: @subgroup_reduce_maxsi
659- gpu.func @subgroup_reduce_maxsi (%arg0 : i32 ) {
661+ gpu.func @subgroup_reduce_maxsi (%arg0 : i32 , %buf : memref < i32 > ) {
660662 // CHECK: nvvm.redux.sync max {{.*}}
661663 %result = gpu.subgroup_reduce maxsi %arg0 uniform {} : (i32 ) -> (i32 )
664+ memref.store %result , %buf [] : memref <i32 >
662665 gpu.return
663666 }
664667 // CHECK-LABEL: func @subgroup_reduce_and
665- gpu.func @subgroup_reduce_and (%arg0 : i32 ) {
668+ gpu.func @subgroup_reduce_and (%arg0 : i32 , %buf : memref < i32 > ) {
666669 // CHECK: nvvm.redux.sync and {{.*}}
667670 %result = gpu.subgroup_reduce and %arg0 uniform {} : (i32 ) -> (i32 )
671+ memref.store %result , %buf [] : memref <i32 >
668672 gpu.return
669673 }
670674 // CHECK-LABEL: @subgroup_reduce_or
671- gpu.func @subgroup_reduce_or (%arg0 : i32 ) {
675+ gpu.func @subgroup_reduce_or (%arg0 : i32 , %buf : memref < i32 > ) {
672676 // CHECK: nvvm.redux.sync or {{.*}}
673677 %result = gpu.subgroup_reduce or %arg0 uniform {} : (i32 ) -> (i32 )
678+ memref.store %result , %buf [] : memref <i32 >
674679 gpu.return
675680 }
676681 // CHECK-LABEL: @subgroup_reduce_xor
677- gpu.func @subgroup_reduce_xor (%arg0 : i32 ) {
682+ gpu.func @subgroup_reduce_xor (%arg0 : i32 , %buf : memref < i32 > ) {
678683 // CHECK: nvvm.redux.sync xor {{.*}}
679684 %result = gpu.subgroup_reduce xor %arg0 uniform {} : (i32 ) -> (i32 )
685+ memref.store %result , %buf [] : memref <i32 >
680686 gpu.return
681687 }
682688}
0 commit comments