Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 368d7ea

Browse files
[mlir][gpu] Add NoMemoryEffect to gpu.subgroup_reduce op (#180227)
Add missing MemoryEffectOpInterface to gpu.subgroup_reduce Signed-off-by: Fabrizio Indirli <[email protected]>
1 parent 9f8f901 commit 368d7ea

3 files changed

Lines changed: 16 additions & 9 deletions

File tree

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1226,7 +1226,7 @@ def GPU_AllReduceOp : GPU_Op<"all_reduce",
12261226
def AnyIntegerOrFloatOr1DVector :
12271227
AnyTypeOf<[AnyIntegerOrFloat, FixedVectorOfRankAndType<[1], [AnyIntegerOrFloat]>]>;
12281228

1229-
def GPU_SubgroupReduceOp : GPU_Op<"subgroup_reduce", [SameOperandsAndResultType]> {
1229+
def GPU_SubgroupReduceOp : GPU_Op<"subgroup_reduce", [SameOperandsAndResultType, NoMemoryEffect]> {
12301230
let summary = "Reduce values among subgroup.";
12311231
let description = [{
12321232
The `subgroup_reduce` op reduces the values of lanes (work items) across a

mlir/test/Conversion/ConvertToSPIRV/gpu.mlir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@ module attributes {
2727

2828
gpu.module @kernels {
2929
// CHECK-LABEL: spirv.func @subgroup_reduce
30-
// CHECK-SAME: (%[[ARG0:.*]]: f32)
30+
// CHECK-SAME: (%[[ARG0:.*]]: f32, [[BUF:.*]]: !spirv.ptr{{[^)]*}})
3131
// CHECK: %{{.*}} = spirv.GroupNonUniformFAdd <Subgroup> <Reduce> %[[ARG0]] : f32 -> f32
32-
gpu.func @subgroup_reduce(%arg0 : f32) kernel
32+
gpu.func @subgroup_reduce(%arg0 : f32, %buf : memref<f32>) kernel
3333
attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
3434
%reduced = gpu.subgroup_reduce add %arg0 {} : (f32) -> (f32)
35+
memref.store %reduced, %buf[] : memref<f32>
3536
gpu.return
3637
}
3738
}

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -644,39 +644,45 @@ gpu.module @test_module_29 {
644644

645645
gpu.module @test_module_30 {
646646
// CHECK-LABEL: func @subgroup_reduce_add
647-
gpu.func @subgroup_reduce_add(%arg0 : i32) {
647+
gpu.func @subgroup_reduce_add(%arg0 : i32, %buf : memref<i32>) {
648648
// CHECK: nvvm.redux.sync add {{.*}}
649649
%result = gpu.subgroup_reduce add %arg0 uniform {} : (i32) -> (i32)
650+
memref.store %result, %buf[] : memref<i32>
650651
gpu.return
651652
}
652653
// CHECK-LABEL: @subgroup_reduce_minsi
653-
gpu.func @subgroup_reduce_minsi(%arg0 : i32) {
654+
gpu.func @subgroup_reduce_minsi(%arg0 : i32, %buf : memref<i32>) {
654655
// CHECK: nvvm.redux.sync min {{.*}}
655656
%result = gpu.subgroup_reduce minsi %arg0 uniform {} : (i32) -> (i32)
657+
memref.store %result, %buf[] : memref<i32>
656658
gpu.return
657659
}
658660
// CHECK-LABEL: @subgroup_reduce_maxsi
659-
gpu.func @subgroup_reduce_maxsi(%arg0 : i32) {
661+
gpu.func @subgroup_reduce_maxsi(%arg0 : i32, %buf : memref<i32>) {
660662
// CHECK: nvvm.redux.sync max {{.*}}
661663
%result = gpu.subgroup_reduce maxsi %arg0 uniform {} : (i32) -> (i32)
664+
memref.store %result, %buf[] : memref<i32>
662665
gpu.return
663666
}
664667
// CHECK-LABEL: func @subgroup_reduce_and
665-
gpu.func @subgroup_reduce_and(%arg0 : i32) {
668+
gpu.func @subgroup_reduce_and(%arg0 : i32, %buf : memref<i32>) {
666669
// CHECK: nvvm.redux.sync and {{.*}}
667670
%result = gpu.subgroup_reduce and %arg0 uniform {} : (i32) -> (i32)
671+
memref.store %result, %buf[] : memref<i32>
668672
gpu.return
669673
}
670674
// CHECK-LABEL: @subgroup_reduce_or
671-
gpu.func @subgroup_reduce_or(%arg0 : i32) {
675+
gpu.func @subgroup_reduce_or(%arg0 : i32, %buf : memref<i32>) {
672676
// CHECK: nvvm.redux.sync or {{.*}}
673677
%result = gpu.subgroup_reduce or %arg0 uniform {} : (i32) -> (i32)
678+
memref.store %result, %buf[] : memref<i32>
674679
gpu.return
675680
}
676681
// CHECK-LABEL: @subgroup_reduce_xor
677-
gpu.func @subgroup_reduce_xor(%arg0 : i32) {
682+
gpu.func @subgroup_reduce_xor(%arg0 : i32, %buf : memref<i32>) {
678683
// CHECK: nvvm.redux.sync xor {{.*}}
679684
%result = gpu.subgroup_reduce xor %arg0 uniform {} : (i32) -> (i32)
685+
memref.store %result, %buf[] : memref<i32>
680686
gpu.return
681687
}
682688
}

0 commit comments

Comments
 (0)