-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[CoroEarly] Hide promise alloca for later passes #139243
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-coroutines @llvm/pr-subscribers-llvm-transforms Author: Weibo He (NewSigma) ChangesCurrently coroutine promises are modeled as allocas. This is problematic because other middle-end passes will assume promise dead after coroutine suspend, leading to misoptimizations. I propose the front ends remain free to emit and use allocas to model coro promise. At CoroEarly, we will replace all uses of promise alloca with Fix #105595 Full diff: https://github.com/llvm/llvm-project/pull/139243.diff 6 Files Affected:
diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst
index 60e32dc467d27..157b20b7d8d73 100644
--- a/llvm/docs/Coroutines.rst
+++ b/llvm/docs/Coroutines.rst
@@ -2121,10 +2121,11 @@ Coroutine Transformation Passes
===============================
CoroEarly
---------
-The pass CoroEarly lowers coroutine intrinsics that hide the details of the
-structure of the coroutine frame, but, otherwise not needed to be preserved to
-help later coroutine passes. This pass lowers `coro.frame`_, `coro.done`_,
-and `coro.promise`_ intrinsics.
+The CoroEarly pass ensures later middle end passes correctly interpret coroutine
+semantics and lowers coroutine intrinsics that not needed to be preserved to
+help later coroutine passes. This pass lowers `coro.promise`_ that outside the
+coroutine body, `coro.frame`_ and `coro.done`_ intrinsics. It replace uses of
+promise alloca with `coro.promise`_ intrinsic.
.. _CoroSplit:
diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroShape.h b/llvm/include/llvm/Transforms/Coroutines/CoroShape.h
index ea93ced1ce29e..aee918946c70f 100644
--- a/llvm/include/llvm/Transforms/Coroutines/CoroShape.h
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroShape.h
@@ -79,7 +79,8 @@ struct Shape {
// Scan the function and collect the above intrinsics for later processing
void analyze(Function &F, SmallVectorImpl<CoroFrameInst *> &CoroFrames,
- SmallVectorImpl<CoroSaveInst *> &UnusedCoroSaves);
+ SmallVectorImpl<CoroSaveInst *> &UnusedCoroSaves,
+ SmallVectorImpl<CoroPromiseInst *> &CoroPromises);
// If for some reason, we were not able to find coro.begin, bailout.
void invalidateCoroutine(Function &F,
SmallVectorImpl<CoroFrameInst *> &CoroFrames);
@@ -87,7 +88,8 @@ struct Shape {
void initABI();
// Remove orphaned and unnecessary intrinsics
void cleanCoroutine(SmallVectorImpl<CoroFrameInst *> &CoroFrames,
- SmallVectorImpl<CoroSaveInst *> &UnusedCoroSaves);
+ SmallVectorImpl<CoroSaveInst *> &UnusedCoroSaves,
+ SmallVectorImpl<CoroPromiseInst *> &CoroPromises);
// Field indexes for special fields in the switch lowering.
struct SwitchFieldIndex {
@@ -265,13 +267,14 @@ struct Shape {
explicit Shape(Function &F) {
SmallVector<CoroFrameInst *, 8> CoroFrames;
SmallVector<CoroSaveInst *, 2> UnusedCoroSaves;
+ SmallVector<CoroPromiseInst *, 2> CoroPromises;
- analyze(F, CoroFrames, UnusedCoroSaves);
+ analyze(F, CoroFrames, UnusedCoroSaves, CoroPromises);
if (!CoroBegin) {
invalidateCoroutine(F, CoroFrames);
return;
}
- cleanCoroutine(CoroFrames, UnusedCoroSaves);
+ cleanCoroutine(CoroFrames, UnusedCoroSaves, CoroPromises);
}
};
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index 5375448d2d2e2..ff1e1c8cc56cf 100644
--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -9,6 +9,7 @@
#include "llvm/Transforms/Coroutines/CoroEarly.h"
#include "CoroInternal.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -165,6 +166,7 @@ static void setCannotDuplicate(CoroIdInst *CoroId) {
void Lowerer::lowerEarlyIntrinsics(Function &F) {
CoroIdInst *CoroId = nullptr;
+ CoroBeginInst *CoroBegin = nullptr;
SmallVector<CoroFreeInst *, 4> CoroFrees;
bool HasCoroSuspend = false;
for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
@@ -175,6 +177,23 @@ void Lowerer::lowerEarlyIntrinsics(Function &F) {
switch (CB->getIntrinsicID()) {
default:
continue;
+ case Intrinsic::coro_begin:
+ case Intrinsic::coro_begin_custom_abi: {
+ auto CBI = cast<CoroBeginInst>(&I);
+
+ // Ignore coro id's that aren't pre-split.
+ auto Id = dyn_cast<CoroIdInst>(CBI->getId());
+ if (Id && !Id->getInfo().isPreSplit())
+ break;
+
+ if (CoroBegin)
+ report_fatal_error(
+ "coroutine should have exactly one defining @llvm.coro.begin");
+ CBI->addRetAttr(Attribute::NonNull);
+ CBI->addRetAttr(Attribute::NoAlias);
+ CoroBegin = CBI;
+ break;
+ }
case Intrinsic::coro_free:
CoroFrees.push_back(cast<CoroFreeInst>(&I));
break;
@@ -218,22 +237,44 @@ void Lowerer::lowerEarlyIntrinsics(Function &F) {
case Intrinsic::coro_destroy:
lowerResumeOrDestroy(*CB, CoroSubFnInst::DestroyIndex);
break;
- case Intrinsic::coro_promise:
- lowerCoroPromise(cast<CoroPromiseInst>(&I));
+ case Intrinsic::coro_promise: {
+ bool OutsideCoro = CoroBegin == nullptr;
+ if (OutsideCoro)
+ lowerCoroPromise(cast<CoroPromiseInst>(&I));
break;
+ }
case Intrinsic::coro_done:
lowerCoroDone(cast<IntrinsicInst>(&I));
break;
}
}
- // Make sure that all CoroFree reference the coro.id intrinsic.
- // Token type is not exposed through coroutine C/C++ builtins to plain C, so
- // we allow specifying none and fixing it up here.
- if (CoroId)
+ if (CoroId) {
+ // Make sure that all CoroFree reference the coro.id intrinsic.
+ // Token type is not exposed through coroutine C/C++ builtins to plain C, so
+ // we allow specifying none and fixing it up here.
for (CoroFreeInst *CF : CoroFrees)
CF->setArgOperand(0, CoroId);
+ if (auto *PA = CoroId->getPromise()) {
+ assert(CoroBegin && "Use Switch-Resumed ABI but missing coro.begin");
+
+ Builder.SetInsertPoint(*CoroBegin->getInsertionPointAfterDef());
+
+ auto *Alignment = Builder.getInt32(PA->getAlign().value());
+ auto *FromPromise = Builder.getInt1(false);
+ SmallVector<Value *, 3> Arg{CoroBegin, Alignment, FromPromise};
+ auto *PI =
+ Builder.CreateIntrinsic(Builder.getPtrTy(), Intrinsic::coro_promise,
+ Arg, {}, "promise.addr");
+ PA->replaceUsesWithIf(PI, [CoroId](Use &U) {
+ bool IsBitcast = U == U.getUser()->stripPointerCasts();
+ bool IsCoroId = U.getUser() == CoroId;
+ return !IsBitcast && !IsCoroId;
+ });
+ }
+ }
+
// Coroutine suspention could potentially lead to any argument modified
// outside of the function, hence arguments should not have noalias
// attributes.
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 7b59c39283ded..325ad36bc666a 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -192,7 +192,8 @@ static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin,
// Collect "interesting" coroutine intrinsics.
void coro::Shape::analyze(Function &F,
SmallVectorImpl<CoroFrameInst *> &CoroFrames,
- SmallVectorImpl<CoroSaveInst *> &UnusedCoroSaves) {
+ SmallVectorImpl<CoroSaveInst *> &UnusedCoroSaves,
+ SmallVectorImpl<CoroPromiseInst *> &CoroPromises) {
clear();
bool HasFinalSuspend = false;
@@ -286,6 +287,9 @@ void coro::Shape::analyze(Function &F,
}
}
break;
+ case Intrinsic::coro_promise:
+ CoroPromises.push_back(cast<CoroPromiseInst>(II));
+ break;
}
}
}
@@ -477,7 +481,8 @@ void coro::AnyRetconABI::init() {
void coro::Shape::cleanCoroutine(
SmallVectorImpl<CoroFrameInst *> &CoroFrames,
- SmallVectorImpl<CoroSaveInst *> &UnusedCoroSaves) {
+ SmallVectorImpl<CoroSaveInst *> &UnusedCoroSaves,
+ SmallVectorImpl<CoroPromiseInst *> &CoroPromises) {
// The coro.frame intrinsic is always lowered to the result of coro.begin.
for (CoroFrameInst *CF : CoroFrames) {
CF->replaceAllUsesWith(CoroBegin);
@@ -489,6 +494,11 @@ void coro::Shape::cleanCoroutine(
for (CoroSaveInst *CoroSave : UnusedCoroSaves)
CoroSave->eraseFromParent();
UnusedCoroSaves.clear();
+
+ for (auto *PI : CoroPromises) {
+ PI->replaceAllUsesWith(getPromiseAlloca());
+ PI->eraseFromParent();
+ }
}
static void propagateCallAttrsFromCallee(CallInst *Call, Function *Callee) {
diff --git a/llvm/test/Instrumentation/AddressSanitizer/skip-coro.ll b/llvm/test/Instrumentation/AddressSanitizer/skip-coro.ll
index 65b27ee2241dd..ed3af686d5257 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/skip-coro.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/skip-coro.ll
@@ -15,8 +15,9 @@ define ptr @foo() #0 {
entry:
%__promise = alloca %struct.Promise, align 8
%0 = call token @llvm.coro.id(i32 16, ptr nonnull %__promise, ptr null, ptr null)
- %1 = call ptr @llvm.coro.noop()
- ret ptr %1
+ %1 = call ptr @llvm.coro.begin(token %0, ptr null)
+ %2 = call ptr @llvm.coro.noop()
+ ret ptr %2
}
declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr)
diff --git a/llvm/test/Transforms/Coroutines/gh105595.ll b/llvm/test/Transforms/Coroutines/gh105595.ll
new file mode 100644
index 0000000000000..cc23419bf7ff4
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/gh105595.ll
@@ -0,0 +1,32 @@
+; Test that store-load operation that crosses suspension point will not be eliminated by DSE
+; Coro result conversion function that attempts to modify promise shall produce this pattern
+; RUN: opt < %s -passes='coro-early,dse' -S | FileCheck %s
+
+define void @fn() presplitcoroutine {
+ %__promise = alloca ptr, align 8
+ %id = call token @llvm.coro.id(i32 16, ptr %__promise, ptr @fn, ptr null)
+ %hdl = call ptr @llvm.coro.begin(token %id, ptr null)
+; CHECK: %promise.addr = call ptr @llvm.coro.promise(ptr %hdl, i32 8, i1 false)
+ %save = call token @llvm.coro.save(ptr null)
+ %sp = call i8 @llvm.coro.suspend(token %save, i1 false)
+ %flag = icmp ule i8 %sp, 1
+ br i1 %flag, label %resume, label %suspend
+
+resume:
+; CHECK: call void @use_value(ptr %promise.addr)
+ call void @use_value(ptr %__promise)
+ br label %suspend
+
+suspend:
+ call i1 @llvm.coro.end(ptr null, i1 false, token none)
+; load value when resume
+; CHECK: %null = load ptr, ptr %promise.addr, align 8
+ %null = load ptr, ptr %__promise, align 8
+ call void @use_value(ptr %null)
+; store value when suspend
+; CHECK: store ptr null, ptr %promise.addr, align 8
+ store ptr null, ptr %__promise, align 8
+ ret void
+}
+
+declare void @use_value(ptr)
|
Could you please have a look? @ChuanqiXu9 @nikic |
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The high level idea looks good. But why do we hack in CoroEarly pass? Can't we generate yet another intrinsic in the frontend? And also, why promise is different from other allocas in the purpose (avoid mis-optimization)?
The intrinsic cannot be We can certainly generate another intrinsic in the frontend. However, there have been too many coroutine-related intrinsics. I would personally prefer to avoid introducing new intrinsic unless necessary.
Only the promise alloca is visible to the caller, whereas other allocas remain internal. This visibility difference accounts for the observed mis-optimization effect. |
I think it is better to consider if it should be instead of the numbers.
|
Well, I don’t see a strong reason not to use |
It is a thought, not a requirement. If your experiment with |
Now that the patch correctly compiles the three reproducers mentioned in the issue, I think it’s not a bad idea to mark it as ready for review. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure. Now how does CoroFrame pass generate the coroutine frame and generate the promise field correctly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, not an approve
We lower coro.promise before |
My question is, preivously, there were many logics about PromiseAlloca in CoroFrame. Then now, why the code can work properly without touching these logic? And if it does work intentionally, we should cleanup these logics. |
Apologies for the incorrect function name earlier and I corrected it. To clarify, we lower coro.promise before the CoroFrame pass. These logics are essential for frame generation. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks.
Could you please help merge the pr? Thank you. |
Please update mail info. We don't accept github no user reply mail. |
Done |
Currently coroutine promises are modeled as allocas. This is problematic because other middle-end passes will assume promise dead after coroutine suspend, leading to misoptimizations.
I propose the front ends remain free to emit and use allocas to model coro promise. At CoroEarly, we will replace all uses of promise alloca with
coro.promise
. Non coroutine passes should only access promise throughcoro.promise
. Then at CoroSplit, we will lowercoro.promise
back to promise alloca again. So that it will be correctly collected into coro frame. Note that we do not have to bother maintainers of other middle-end passes.Fix #105595