-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[Flang][MLIR] - Handle the mapping of subroutine arguments when they are subsequently used inside the region of an omp.target
Op
#134967
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-flang-fir-hlfir @llvm/pr-subscribers-flang-openmp Author: Pranav Bhandarkar (bhandarkar-pranav) ChangesThis is a fix for #134912 which is a problem with mapping There really are two problems. Fixing the first exposed the second. The first problem is that OpenMP lowering of maps in The second problem this fixes is that it adds bounds to Full diff: https://github.com/llvm/llvm-project/pull/134967.diff 3 Files Affected:
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 312557d5da07e..ae58bf9a5b052 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -219,12 +219,18 @@ static void bindEntryBlockArgs(lower::AbstractConverter &converter,
auto bindSingleMapLike = [&converter,
&firOpBuilder](const semantics::Symbol &sym,
+ const mlir::Value val,
const mlir::BlockArgument &arg) {
// Clones the `bounds` placing them inside the entry block and returns
// them.
auto cloneBound = [&](mlir::Value bound) {
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
- mlir::Operation *clonedOp = firOpBuilder.clone(*bound.getDefiningOp());
+ mlir::Operation *definingOp = bound.getDefiningOp();
+ mlir::Operation *clonedOp = firOpBuilder.clone(*definingOp);
+ // Todo: Do we need to check for more operation types?
+ // For now, specializing only for fir::UnboxCharOp
+ if (auto unboxCharOp = mlir::dyn_cast<fir::UnboxCharOp>(definingOp))
+ return clonedOp->getResult(1);
return clonedOp->getResult(0);
}
TODO(converter.getCurrentLocation(),
@@ -268,8 +274,54 @@ static void bindEntryBlockArgs(lower::AbstractConverter &converter,
cloneBounds(v.getLBounds())));
},
[&](const fir::CharBoxValue &v) {
- converter.bindSymbol(
- sym, fir::CharBoxValue(arg, cloneBound(v.getLen())));
+ // In some cases, v.len could reference the input to the
+ // hlfir.declare which is the corresponding v.addr. While this isn't
+ // a big problem by itself, it is desirable to extract this out of
+ // v.addr itself since it's first result will be of type
+ // fir.boxchar<>. For example, consider the following
+ //
+ // func.func private @_QFPrealtest(%arg0: !fir.boxchar<1>)
+ // %2 = fir.dummy_scope : !fir.dscope
+ // %3:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) ->
+ // (!fir.ref<!fir.char<1,?>>, index)
+ // %4:2 = hlfir.declare (%3#0, %3#1, %2):(!fir.ref<!fir.char<1,?>>,
+ // index,!fir.dscope) ->
+ // (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+
+ // In the case above,
+ // v.addr is
+ // %4:2 = hlfir.declare (%3#0, %3#1, %2):(!fir.ref<!fir.char<1,?>>,
+ // index,!fir.dscope) ->
+ // (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+ // v.len is
+ // %3:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) ->
+ // (!fir.ref<!fir.char<1,?>>, index)
+
+ // Mapping this to the target will create a use of %arg0 on the
+ // target. Since omp.target is IsolatedFromAbove, %arg0 will have to
+ // be mapped. Presently, OpenMP lowering of target barfs when it has
+ // to map a value that doesnt have a defining op. This can be fixed.
+ // Or we ensure that v.len is fir.unboxchar %4#0 which will
+ // cause %4#1 to be used on the target and consequently be
+ // mapped to the target. As such then, there wont be any use of the
+ // block argument %arg0 on the target.
+
+ mlir::Value len = v.getLen();
+ if (auto declareOp = val.getDefiningOp<hlfir::DeclareOp>()) {
+ mlir::Value base = declareOp.getBase();
+ if (auto boxCharType =
+ mlir::dyn_cast<fir::BoxCharType>(base.getType())) {
+ mlir::Type lenType = firOpBuilder.getCharacterLengthType();
+ mlir::Type refType =
+ firOpBuilder.getRefType(boxCharType.getEleTy());
+ mlir::Location loc = converter.getCurrentLocation();
+ auto unboxed = firOpBuilder.create<fir::UnboxCharOp>(
+ loc, refType, lenType, base);
+ len = unboxed.getResult(1);
+ }
+ }
+ auto charBoxValue = fir::CharBoxValue(arg, cloneBound(len));
+ converter.bindSymbol(sym, charBoxValue);
},
[&](const fir::UnboxedValue &v) { converter.bindSymbol(sym, arg); },
[&](const auto &) {
@@ -281,6 +333,7 @@ static void bindEntryBlockArgs(lower::AbstractConverter &converter,
auto bindMapLike =
[&bindSingleMapLike](llvm::ArrayRef<const semantics::Symbol *> syms,
+ llvm::ArrayRef<mlir::Value> vars,
llvm::ArrayRef<mlir::BlockArgument> args) {
// Structure component symbols don't have bindings, and can only be
// explicitly mapped individually. If a member is captured implicitly
@@ -289,8 +342,8 @@ static void bindEntryBlockArgs(lower::AbstractConverter &converter,
llvm::copy_if(syms, std::back_inserter(processedSyms),
[](auto *sym) { return !sym->owner().IsDerivedType(); });
- for (auto [sym, arg] : llvm::zip_equal(processedSyms, args))
- bindSingleMapLike(*sym, arg);
+ for (auto [sym, var, arg] : llvm::zip_equal(processedSyms, vars, args))
+ bindSingleMapLike(*sym, var, arg);
};
auto bindPrivateLike = [&converter, &firOpBuilder](
@@ -321,17 +374,20 @@ static void bindEntryBlockArgs(lower::AbstractConverter &converter,
// Process in clause name alphabetical order to match block arguments order.
// Do not bind host_eval variables because they cannot be used inside of the
// corresponding region, except for very specific cases handled separately.
- bindMapLike(args.hasDeviceAddr.syms, op.getHasDeviceAddrBlockArgs());
+ bindMapLike(args.hasDeviceAddr.syms, args.hasDeviceAddr.vars,
+ op.getHasDeviceAddrBlockArgs());
bindPrivateLike(args.inReduction.syms, args.inReduction.vars,
op.getInReductionBlockArgs());
- bindMapLike(args.map.syms, op.getMapBlockArgs());
+ bindMapLike(args.map.syms, args.map.vars, op.getMapBlockArgs());
bindPrivateLike(args.priv.syms, args.priv.vars, op.getPrivateBlockArgs());
bindPrivateLike(args.reduction.syms, args.reduction.vars,
op.getReductionBlockArgs());
bindPrivateLike(args.taskReduction.syms, args.taskReduction.vars,
op.getTaskReductionBlockArgs());
- bindMapLike(args.useDeviceAddr.syms, op.getUseDeviceAddrBlockArgs());
- bindMapLike(args.useDevicePtr.syms, op.getUseDevicePtrBlockArgs());
+ bindMapLike(args.useDeviceAddr.syms, args.useDeviceAddr.vars,
+ op.getUseDeviceAddrBlockArgs());
+ bindMapLike(args.useDevicePtr.syms, args.useDevicePtr.vars,
+ op.getUseDevicePtrBlockArgs());
}
/// Get the list of base values that the specified map-like variables point to.
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 61f8713028a7f..5357aaa8043f2 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -542,7 +542,60 @@ class MapInfoFinalizationPass
// iterations from previous function scopes.
localBoxAllocas.clear();
- // First, walk `omp.map.info` ops to see if any record members should be
+ // First, walk `omp.map.info` ops to see if any of them have varPtrs
+ // with an underlying type of fir.char<k, ?>, i.e a character
+ // with dynamic length. If so, check if they need bounds added.
+ func->walk([&](mlir::omp::MapInfoOp op) {
+ mlir::Value varPtr = op.getVarPtr();
+ mlir::Type underlyingVarType = fir::unwrapRefType(varPtr.getType());
+ if (!mlir::isa<fir::CharacterType>(underlyingVarType))
+ return mlir::WalkResult::advance();
+
+ fir::CharacterType cType =
+ mlir::cast<fir::CharacterType>(underlyingVarType);
+ if (!cType.hasDynamicLen())
+ return mlir::WalkResult::advance();
+
+ if (!op.getBounds().empty())
+ return mlir::WalkResult::advance();
+ // This means varPtr is a BlockArgument. I do not know how to get to a
+ // fir.boxchar<> type of mlir::Value for varPtr. So, skipping this for
+ // now.
+ mlir::Operation *definingOp = varPtr.getDefiningOp();
+ if (!definingOp)
+ return mlir::WalkResult::advance();
+
+ if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(definingOp)) {
+ mlir::Value base = declOp.getBase();
+ assert(mlir::isa<fir::BoxCharType>(base.getType()));
+ // mlir::value unboxChar
+ builder.setInsertionPoint(op);
+ fir::BoxCharType boxCharType =
+ mlir::cast<fir::BoxCharType>(base.getType());
+ mlir::Type idxTy = builder.getIndexType();
+ mlir::Type lenType = builder.getCharacterLengthType();
+ mlir::Type refType = builder.getRefType(boxCharType.getEleTy());
+ mlir::Location location = op.getLoc();
+ auto unboxed = builder.create<fir::UnboxCharOp>(location, refType,
+ lenType, base);
+ // len = unboxed.getResult(1);
+ mlir::Value zero = builder.createIntegerConstant(location, idxTy, 0);
+ mlir::Value one = builder.createIntegerConstant(location, idxTy, 1);
+ mlir::Value extent = unboxed.getResult(1);
+ mlir::Value stride = one;
+ mlir::Value ub =
+ builder.create<mlir::arith::SubIOp>(location, extent, one);
+ mlir::Type boundTy = builder.getType<mlir::omp::MapBoundsType>();
+ mlir::Value boundsOp = builder.create<mlir::omp::MapBoundsOp>(
+ location, boundTy, /*lower_bound=*/zero,
+ /*upper_bound=*/ub, /*extent=*/extent, /*stride=*/stride,
+ /*stride_in_bytes = */ true, /*start_idx=*/zero);
+ op.getBoundsMutable().append({boundsOp});
+ }
+ return mlir::WalkResult::advance();
+ });
+
+ // Next, walk `omp.map.info` ops to see if any record members should be
// implicitly mapped.
func->walk([&](mlir::omp::MapInfoOp op) {
mlir::Type underlyingType =
diff --git a/flang/test/Lower/OpenMP/map-character.f90 b/flang/test/Lower/OpenMP/map-character.f90
new file mode 100644
index 0000000000000..2ed2397713b5d
--- /dev/null
+++ b/flang/test/Lower/OpenMP/map-character.f90
@@ -0,0 +1,47 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+subroutine TestOfCharacter(a0, a1, l)
+ character(len=*), intent(in) :: a0
+ character(len=*), intent(inout):: a1
+ integer, intent(in) :: l
+
+ !$omp target map(to:a0) map(from: a1)
+ a1 = a0
+ !$omp end target
+end subroutine TestOfCharacter
+
+
+!CHECK: %[[A1_BOXCHAR_ALLOCA:.*]] = fir.alloca !fir.boxchar<1>
+!CHECK: %[[A0_BOXCHAR_ALLOCA:.*]] = fir.alloca !fir.boxchar<1>
+!CHECK: %[[UNBOXED_ARG0:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+!CHECK: %[[A0_DECL:.*]]:2 = hlfir.declare %[[UNBOXED_ARG0]]#0 typeparams %[[UNBOXED_ARG0]]#1 dummy_scope {{.*}} -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+!CHECK: fir.store %[[A0_DECL]]#0 to %[[A0_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>
+!CHECK: %[[UNBOXED_ARG1:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+!CHECK: %[[A1_DECL:.*]]:2 = hlfir.declare %[[UNBOXED_ARG1]]#0 typeparams %[[UNBOXED_ARG1]]#1 dummy_scope {{.*}} -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+!CHECK: fir.store %[[A1_DECL]]#0 to %[[A1_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>
+!CHECK: %[[UNBOXED_A0_DECL:.*]]:2 = fir.unboxchar %[[A0_DECL]]#0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+!CHECK: %[[A0_LB:.*]] = arith.constant 0 : index
+!CHECK: %[[A0_STRIDE:.*]] = arith.constant 1 : index
+!CHECK: %[[A0_UB:.*]] = arith.subi %[[UNBOXED_A0_DECL]]#1, %[[A0_STRIDE]] : index
+!CHECK: %[[A0_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[A0_LB]] : index) upper_bound(%[[A0_UB]] : index) extent(%[[UNBOXED_A0_DECL]]#1 : index)
+!CHECK-SAME: stride(%[[A0_STRIDE]] : index) start_idx(%[[A0_LB]] : index) {stride_in_bytes = true}
+!CHECK: %[[A0_MAP:.*]] = omp.map.info var_ptr(%[[A0_DECL]]#1 : !fir.ref<!fir.char<1,?>>, !fir.char<1,?>) map_clauses(to) capture(ByRef) bounds(%[[A0_BOUNDS]]) -> !fir.ref<!fir.char<1,?>> {name = "a0"}
+!CHECK: %[[UNBOXED_A1_DECL:.*]]:2 = fir.unboxchar %[[A1_DECL]]#0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+!CHECK: %[[A1_LB:.*]] = arith.constant 0 : index
+!CHECK: %[[A1_STRIDE:.*]] = arith.constant 1 : index
+!CHECK: %[[A1_UB:.*]] = arith.subi %[[UNBOXED_A1_DECL]]#1, %[[A1_STRIDE]] : index
+!CHECK: %[[A1_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[A1_LB]] : index) upper_bound(%[[A1_UB]] : index) extent(%[[UNBOXED_A1_DECL]]#1 : index)
+!CHECKL-SAME: stride(%[[A1_STRIDE]] : index) start_idx(%[[A1_LB]] : index) {stride_in_bytes = true}
+!CHECK: %[[A1_MAP:.*]] = omp.map.info var_ptr(%[[A1_DECL]]#1 : !fir.ref<!fir.char<1,?>>, !fir.char<1,?>) map_clauses(from) capture(ByRef) bounds(%[[A1_BOUNDS]]) -> !fir.ref<!fir.char<1,?>> {name = "a1"}
+
+!CHECK: %[[A0_BOXCHAR_MAP:.*]] = omp.map.info var_ptr(%[[A0_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(implicit, to) capture(ByRef) -> !fir.ref<!fir.boxchar<1>> {name = ""}
+!CHECK: %[[A1_BOXCHAR_MAP:.*]] = omp.map.info var_ptr(%[[A1_BOXCHAR_ALLOCA]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>) map_clauses(implicit, to) capture(ByRef) -> !fir.ref<!fir.boxchar<1>> {name = ""}
+
+!CHECK: omp.target map_entries(%[[A0_MAP]] -> %[[TGT_A0:.*]], %[[A1_MAP]] -> %[[TGT_A1:.*]], %[[A0_BOXCHAR_MAP]] -> %[[TGT_A0_BOXCHAR:.*]], %[[A1_BOXCHAR_MAP]] -> %[[TGT_A1_BOXCHAR:.*]] : !fir.ref<!fir.char<1,?>>, !fir.ref<!fir.char<1,?>>, !fir.ref<!fir.boxchar<1>>, !fir.ref<!fir.boxchar<1>>) {
+!CHECK: %[[TGT_A1_BC_LD:.*]] = fir.load %[[TGT_A1_BOXCHAR]] : !fir.ref<!fir.boxchar<1>>
+!CHECK: %[[TGT_A0_BC_LD:.*]] = fir.load %[[TGT_A0_BOXCHAR]] : !fir.ref<!fir.boxchar<1>>
+!CHECK: %[[UNBOXED_TGT_A0:.*]]:2 = fir.unboxchar %[[TGT_A0_BC_LD]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+!CHECK: %[[TGT_A0_DECL:.*]]:2 = hlfir.declare %[[TGT_A0]] typeparams %[[UNBOXED_TGT_A0]]#1 {{.*}} -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+!CHECK: %[[UNBOXED_TGT_A1:.*]]:2 = fir.unboxchar %[[TGT_A1_BC_LD]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+!CHECK: %[[TGT_A1_DECL:.*]]:2 = hlfir.declare %[[TGT_A1]] typeparams %[[UNBOXED_TGT_A1]]#1 {{.*}} -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+
|
flang/lib/Lower/OpenMP/OpenMP.cpp
Outdated
// (!fir.ref<!fir.char<1,?>>, index) | ||
|
||
// Mapping this to the target will create a use of %arg0 on the | ||
// target. Since omp.target is IsolatedFromAbove, %arg0 will have to |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I do not understand why using %3#2
in the target code results in mapping of %arg0
- can you please clarify? It should be just a use of a scalar index
value, that can be made firstprivate
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@vzakhari - %arg0
itself isn't a scalar (It is fir.boxchar
) From my understanding, it'll then have to be mapped so that it can be "unboxed" (fir.unbox %arg0
).
The problem really is that v.len
should have been expressed in terms of %4:2
. (Please correct me if I am wrong about my understanding of any of these points)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I see it now that cloneBound
clones the defining operation given one of its results. It was also assuming that the cloned result is #0
of the operation, which is wrong, and you fixed it for UnboxCharOp
.
This looks broken to me. I think what we really want to pass to the OpenMP region is the scalar bound value, and this can be done without cloning the whole defining operation.
The bounds should be scalars, as I understand, so it should be unnecessary to clone the char box to get the bound value.
Consider how this case works:
subroutine TestOfCharacter(a0, a1, l)
integer, intent(in) :: l
real, intent(in) :: a0(l:)
real, intent(inout):: a1(l:)
!$omp target map(to:a0) map(from: a1)
a1 = a0
!$omp end target
end subroutine TestOfCharacter
The lower bounds are stored into temporaries for passing them via implicit maps to the target region:
%0 = fir.alloca i32
%1 = fir.alloca i32
%2 = fir.dummy_scope : !fir.dscope
%3:2 = hlfir.declare %arg2 dummy_scope %2 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFtestofcharacterEl"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
%4 = fir.load %3#0 : !fir.ref<i32>
fir.store %4 to %0 : !fir.ref<i32>
...
%9 = fir.load %3#0 : !fir.ref<i32>
fir.store %9 to %1 : !fir.ref<i32>
...
%24 = omp.map.info var_ptr(%1 : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = ""}
%25 = omp.map.info var_ptr(%0 : !fir.ref<i32>, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = ""}
omp.target map_entries(%18 -> %arg3, %23 -> %arg4, %24 -> %arg5, %25 -> %arg6 : !fir.ref<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>, !fir.ref<i32>, !fir.ref<i32>) {
%26 = fir.load %arg6 : !fir.ref<i32>
%27 = fir.load %arg5 : !fir.ref<i32>
I do not know exactly where this happens, but I think the character length should be handled the same way.
Do you know where this array lower bounds handling happens?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the lower bound l
is needed in the target OpenMP region in the hfir.declare
ops for a0
and a1
, it is implicitly captured. It is explicitly mapped though because all values used insde the omp.target
region need to be mapped to incoming block arguments because omp.target
is IsolatedFromAbove
.
The problem in the case that this PR is solving is that this process of creating a temporary for an implicitly captured value is contingent upon such a value having a definingOp
that is non-null. In the case of block arguments, the definingOp is null.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't the length of the character (i.e. the index
value itself, not its defining op) also needed in the hlfir.declare
of the character var? Or is it different because in the array lower bound there is !fir.ref<i32>
and in the length case it is i32
?
The problem in the case that this PR is solving is that this process of creating a temporary for an implicitly captured value is contingent upon such a value having a definingOp that is non-null. In the case of block arguments, the definingOp is null.
Can you please point me to where this is done?
Again, my general concern here is that we should probably just pass the chracter length value as a literal firstprivate, instead of cloning the defining fir.unboxchar
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://github.com/llvm/llvm-project/blob/main/flang/lib/Lower/OpenMP/OpenMP.cpp#L1371 this is the segment I believe, we map or clone across dependencies on the bounds
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you! So the temporarization of the direct uses is run after bindEntryBlockArgs
, and I assume it will kick in if we just generate a direct use of the index
Value of the character length. Won't it happen automatically, if you just avoid the cloning, i.e., in the original code, replace this:
converter.bindSymbol(
sym, fir::CharBoxValue(arg, cloneBound(v.getLen())));
with this:
converter.bindSymbol(
sym, fir::CharBoxValue(arg, v.getLen()));
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, this is an interesting suggestion. Let me think about this (and try it) before getting back to you.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@vzakhari - For the reduced testcase that I have added to this PR, not cloning the length works. I'll test this a little more rigorously, while also trying to unearth the motivation for the presence of the clone in the first place (it predates my PR of course)
…ck that will advance the walk in case the MapInfoOp already has bounds
…utside omp target This patch handles block arguments that are live-in into the target region of an omp.target op. This is done by simply reusing the mapping mechanism in place for values that are not block arguments. Further, in MapInfoFinalizationPass, this patch adds bounds to maps that map `!fir.ref<!fir.char<k, ?>>` types. Also, we don't clone bounds when binding entry block arguments any more.
aceffbc
to
a482741
Compare
@vzakhari - I have changed my approach and updated this PR. I have tested this locally to the extent that I could and things work fine. Could you please review this PR? @agozillon @TIFitis @raghavendhra @ergawy @skatrak - Could you please review this PR? TIA |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Pranav, I gave this a somewhat superficial look and seems reasonable to me. Please wait for @vzakhari's approval before merging, although it's probably best if someone more knowledgeable about OpenMP map handling gave it a look as well.
// with dynamic length. If so, check if they need bounds added. | ||
func->walk([&](mlir::omp::MapInfoOp op) { | ||
if (!op.getBounds().empty()) | ||
return mlir::WalkResult::advance(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Here you can use the void
callback specialization of Operation::walk()
, since we don't need to interrupt the walk or skip nested operations.
return mlir::WalkResult::advance(); | |
return; |
return builder.create<mlir::omp::MapBoundsOp>( | ||
loc, boundTy, /*lower_bound=*/zero, | ||
/*upper_bound=*/ub, /*extent=*/extent, /*stride=*/stride, | ||
/*stride_in_bytes = */ true, /*start_idx=*/zero); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/*stride_in_bytes = */ true, /*start_idx=*/zero); | |
/*stride_in_bytes=*/ true, /*start_idx=*/zero); |
|
||
!CHECK: %[[A0_BOXCHAR_ALLOCA:.*]] = fir.alloca !fir.boxchar<1> | ||
!CHECK: %[[A1_BOXCHAR_ALLOCA:.*]] = fir.alloca !fir.boxchar<1> | ||
!CHECK: %[[UNBOXED_ARG0:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: I'd suggest not hard coding function arguments like %arg0
, but rather capturing it from a check line (e.g. CHECK: func.func @...(%[[ARG:.*]]...
) just in case this breaks in the future.
auto bindSingleMapLike = [&converter, | ||
&firOpBuilder](const semantics::Symbol &sym, | ||
const mlir::BlockArgument &arg) { | ||
// Clones the `bounds` placing them inside the entry block and returns | ||
// them. | ||
auto cloneBound = [&](mlir::Value bound) { | ||
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { | ||
mlir::Operation *clonedOp = firOpBuilder.clone(*bound.getDefiningOp()); | ||
return clonedOp->getResult(0); | ||
} | ||
TODO(converter.getCurrentLocation(), | ||
"target map-like clause operand unsupported bound type"); | ||
}; | ||
|
||
auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) { | ||
llvm::SmallVector<mlir::Value> clonedBounds; | ||
llvm::transform(bounds, std::back_inserter(clonedBounds), | ||
[&](mlir::Value bound) { return cloneBound(bound); }); | ||
return clonedBounds; | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I remember it being essential to clone the bounds rather than using them directly inside the region. But, maybe things have changed which allows this now.
Can you please check if the following snippet works fine with this change?
subroutine omp_target_implicit(n)
integer :: n
integer :: a(n, 1024)
!$omp target
a(2,3) = 10
!$omp end target
end subroutine omp_target_implicit
Also, it would be great if @jeanPerier could +1 this change :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @TIFitis for taking a look. The above snippet compiled fine with this PR.
fir.boxchar<k>
and use it to set the bounds omp.map.info
ops.omp.target
Op
This is a fix for #134912 which is a problem with mapping
fir.boxchar<k>
type values to the target i.e anomp.target
op.There really are two problems. Fixing the first exposed the second. The first problem is that OpenMP lowering of maps in
omp.target
in Flang cannot handle the mapping of a value that doesnt have a defining operation. In other words, a value that is a block argument. This is a corner case and it happens when a value is used inside the region of anomp.target
and that value is a block argument in the region enclosing theomp.target
op. This patch fixes this by trying its best to not use a block argument inside the omp.target if it is possible.The second problem this fixes is that it adds bounds to
omp.map.info
ops that mapfir.char<k, ?>
types by extracting the length from the correspondingfir.boxchar