Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9a81d85

Browse files
authored
[flang][acc] Fix the indexing of the reduction combiner for multidimensional static arrays (#155536)
In the following example of reducing a static 2D array, we have incorrect coordinates for array access in the reduction combiner. This PR reverses the order of the induction variables used for such array indexing. For other cases of static arrays, we reverse the loop order as well so that the innermost loop can handle the innermost dimension. ```Fortran program main implicit none integer, parameter :: m = 2 integer, parameter :: n = 10 integer :: r(n,m), i r = 0 !$acc parallel loop reduction(+:r(:n,:m)) do i = 1, n r(i, 1) = i enddo print *, r end program main ``` Currently, we have: ```mlir fir.do_loop %arg2 = %c0 to %c1 step %c1 { fir.do_loop %arg3 = %c0 to %c9 step %c1 { %0 = fir.coordinate_of %arg0, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32> %1 = fir.coordinate_of %arg1, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32> ``` We'll obtain: ```mlir fir.do_loop %arg2 = %c0 to %c1 step %c1 { fir.do_loop %arg3 = %c0 to %c9 step %c1 { %0 = fir.coordinate_of %arg0, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32> %1 = fir.coordinate_of %arg1, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32> ```
1 parent f44eaf4 commit 9a81d85

File tree

3 files changed

+32
-30
lines changed

3 files changed

+32
-30
lines changed

flang/lib/Lower/OpenACC.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1575,7 +1575,7 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
15751575
if (bounds.empty()) {
15761576
llvm::SmallVector<mlir::Value> extents;
15771577
mlir::Type idxTy = builder.getIndexType();
1578-
for (auto extent : seqTy.getShape()) {
1578+
for (auto extent : llvm::reverse(seqTy.getShape())) {
15791579
mlir::Value lb = mlir::arith::ConstantOp::create(
15801580
builder, loc, idxTy, builder.getIntegerAttr(idxTy, 0));
15811581
mlir::Value ub = mlir::arith::ConstantOp::create(
@@ -1607,12 +1607,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
16071607
}
16081608
} else {
16091609
// Lowerbound, upperbound and step are passed as block arguments.
1610-
[[maybe_unused]] unsigned nbRangeArgs =
1610+
unsigned nbRangeArgs =
16111611
recipe.getCombinerRegion().getArguments().size() - 2;
16121612
assert((nbRangeArgs / 3 == seqTy.getDimension()) &&
16131613
"Expect 3 block arguments per dimension");
1614-
for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size();
1615-
i += 3) {
1614+
for (int i = nbRangeArgs - 1; i >= 2; i -= 3) {
16161615
mlir::Value lb = recipe.getCombinerRegion().getArgument(i);
16171616
mlir::Value ub = recipe.getCombinerRegion().getArgument(i + 1);
16181617
mlir::Value step = recipe.getCombinerRegion().getArgument(i + 2);
@@ -1623,8 +1622,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
16231622
ivs.push_back(loop.getInductionVar());
16241623
}
16251624
}
1626-
auto addr1 = fir::CoordinateOp::create(builder, loc, refTy, value1, ivs);
1627-
auto addr2 = fir::CoordinateOp::create(builder, loc, refTy, value2, ivs);
1625+
llvm::SmallVector<mlir::Value> reversedIvs(ivs.rbegin(), ivs.rend());
1626+
auto addr1 =
1627+
fir::CoordinateOp::create(builder, loc, refTy, value1, reversedIvs);
1628+
auto addr2 =
1629+
fir::CoordinateOp::create(builder, loc, refTy, value2, reversedIvs);
16281630
auto load1 = fir::LoadOp::create(builder, loc, addr1);
16291631
auto load2 = fir::LoadOp::create(builder, loc, addr2);
16301632
mlir::Value res =

flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,8 @@
381381
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
382382
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
383383
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
384-
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
385-
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
384+
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
385+
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
386386
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
387387
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
388388
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -427,8 +427,8 @@
427427
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
428428
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
429429
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
430-
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
431-
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
430+
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
431+
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
432432
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
433433
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
434434
! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -612,8 +612,8 @@
612612
! CHECK: %[[UB2:.*]] = arith.constant 99 : index
613613
! CHECK: %[[STEP2:.*]] = arith.constant 1 : index
614614
! CHECK: fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
615-
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
616-
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
615+
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
616+
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
617617
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
618618
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
619619
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -641,8 +641,8 @@
641641
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
642642
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
643643
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
644-
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
645-
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
644+
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
645+
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
646646
! CHECK: %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
647647
! CHECK: %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
648648
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32

flang/test/Lower/OpenACC/acc-reduction.f90

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -423,15 +423,15 @@
423423
! CHECK: } combiner {
424424
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
425425
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
426-
! CHECK: %[[UB0:.*]] = arith.constant 99 : index
426+
! CHECK: %[[UB0:.*]] = arith.constant 9 : index
427427
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
428428
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
429429
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
430-
! CHECK: %[[UB1:.*]] = arith.constant 9 : index
430+
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
431431
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
432432
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
433-
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
434-
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
433+
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
434+
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
435435
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
436436
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
437437
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -469,15 +469,15 @@
469469
! CHECK: } combiner {
470470
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xf32>>):
471471
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
472-
! CHECK: %[[UB0:.*]] = arith.constant 99 : index
472+
! CHECK: %[[UB0:.*]] = arith.constant 9 : index
473473
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
474474
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
475475
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
476-
! CHECK: %[[UB1:.*]] = arith.constant 9 : index
476+
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
477477
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
478478
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
479-
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
480-
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
479+
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
480+
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
481481
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
482482
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
483483
! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -650,19 +650,19 @@
650650
! CHECK: } combiner {
651651
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10x2xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10x2xi32>>):
652652
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
653-
! CHECK: %[[UB0:.*]] = arith.constant 99 : index
653+
! CHECK: %[[UB0:.*]] = arith.constant 1 : index
654654
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
655655
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
656656
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
657657
! CHECK: %[[UB1:.*]] = arith.constant 9 : index
658658
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
659659
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
660660
! CHECK: %[[LB2:.*]] = arith.constant 0 : index
661-
! CHECK: %[[UB2:.*]] = arith.constant 1 : index
661+
! CHECK: %[[UB2:.*]] = arith.constant 99 : index
662662
! CHECK: %[[STEP2:.*]] = arith.constant 1 : index
663663
! CHECK: fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
664-
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
665-
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
664+
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
665+
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
666666
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
667667
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
668668
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -683,15 +683,15 @@
683683
! CHECK: } combiner {
684684
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
685685
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
686-
! CHECK: %[[UB0:.*]] = arith.constant 99 : index
686+
! CHECK: %[[UB0:.*]] = arith.constant 9 : index
687687
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
688688
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
689689
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
690-
! CHECK: %[[UB1:.*]] = arith.constant 9 : index
690+
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
691691
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
692692
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
693-
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
694-
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
693+
! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
694+
! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
695695
! CHECK: %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
696696
! CHECK: %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
697697
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32

0 commit comments

Comments
 (0)