-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[clang][coroutines] Run-time crash with optimization when using coroutine with co_await
#105595
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
@llvm/issue-subscribers-coroutines Author: Douglas (dgg5503)
## Summary
Starting with Clang 17 at commit 54225c4, attempting to execute the provided reduced programs compiled with `clang` using `-O2` results in a crash at runtime.
Specifically, the crash occurs in the function I have confirmed that both reproducers run without triggering sanitizers in my environment (address, memory, undefined, etc.). Reproducers
#include <coroutine>
#include <memory>
struct br;
struct bs {
bs(std::coroutine_handle<br>);
int x() { return bt; }
void bu() { bt = 0; }
void t() { bv.resume(); }
std::coroutine_handle<br> bv;
int bt;
};
struct bw {
using promise_type = br;
bw(std::coroutine_handle<promise_type> h) : v(make_shared<bs>(h)) {}
void t() { v->t(); }
std::shared_ptr<bs> r() { return v; }
std::shared_ptr<bs> v;
};
struct bx {
int await_ready() { return 0; }
void await_resume() {}
void await_suspend(std::coroutine_handle<br>);
bw by;
};
struct br {
auto initial_suspend() { return std::suspend_always(); }
auto final_suspend() noexcept { return std::suspend_always(); }
auto get_return_object() {
return std::coroutine_handle<br>::from_promise(*this);
}
void unhandled_exception() {}
auto await_transform(bw h) { return bx(h); }
void return_void() {}
void u(bs *h) { v = h; }
bs *r() { return v; }
bs *v;
};
void bx::await_suspend(std::coroutine_handle<br> h) {
auto bi = h.promise();
auto q = bi.r();
auto s = by.r();
if (q->x())
s->bu();
by.t();
}
bs::bs(std::coroutine_handle<br> h) {
bt = 0;
bv = h;
auto &bz = h.promise();
bz.u(this);
}
bw ca() {
fprintf(stderr, "ca (co_return) called\n");
co_return;
}
bw cb() {
fprintf(stderr, "cb (co_await) called\n");
co_await ca();
}
bw cc = cb();
int main() {
bw cd(cc);
cd.t();
} Here is the same reproducer reduced including headers from glibc 2.35 / glibcxx 3.4.30 namespace std {
template <int a> struct b {
struct c {
int d[a];
};
};
inline namespace {
template <typename e> struct coroutine_traits : e {};
template <typename = void> struct coroutine_handle;
template <> struct coroutine_handle<> {};
template <typename f> struct coroutine_handle {
static coroutine_handle g(f &h) {
coroutine_handle i;
i.k = __builtin_coro_promise(&h, 0, 1);
return i;
}
static coroutine_handle from_address(void *h) {
coroutine_handle i;
i.k = h;
return i;
}
coroutine_handle<> j;
operator coroutine_handle<>() { return j; }
void l() { __builtin_coro_resume(k); }
f &m() {
void *aa = __builtin_coro_promise(k, 0, 0);
return *static_cast<f *>(aa);
}
void *k;
};
struct ab {
int await_ready() noexcept { return 0; }
void await_suspend(coroutine_handle<>) noexcept {}
void await_resume() noexcept {}
};
} // namespace
template <typename, typename> struct n;
template <template <typename...> class w, typename y, typename ac,
typename... ad>
struct n<w<ac, ad...>, y> {
using c = w<y>;
};
} // namespace std
void *operator new(unsigned long, void *);
template <typename ac, typename... ae> void af(ac *h, ae... o) {
new (h) ac(o...);
}
template <typename ac> struct ag {
ac *ah(int) { return static_cast<ac *>(operator new(sizeof(ac))); }
};
namespace std {
template <typename ai, typename y> using aj = n<ai, y>::c;
template <typename> struct p;
template <typename ac> struct p<ag<ac>> {
using ak = ag<ac>;
using al = ac;
using am = ac *;
using an = int;
static am ah(ak h, an) { return h.ah(0); }
template <typename y, typename... ae> static void ao(ak, y o, ae... ap) {
af(o, ap...);
}
};
} // namespace std
namespace std {
template <typename ai> struct aq {
using am = p<ai>::am;
using al = p<ai>::al;
aq(ai, am o) : ar(o) {}
al *as() { return ar; }
am ar;
};
template <typename ai> aq<ai> at(ai h) { return {h, p<ai>::ah(h, 0)}; }
} // namespace std
template <typename ac> struct au {
std::b<sizeof(ac)>::c av;
ac *ar() {
void *j = &av;
return static_cast<ac *>(j);
}
};
namespace std {
template <typename> struct aw;
template <typename ac> struct ax {
ax(ac) {}
};
template <typename ai> struct ay {
ai az;
};
template <typename ac, typename ai> struct ba {
struct bb : ax<ai> {
au<ac> av;
};
using bc = aj<ai, ba>;
template <typename... ae> ba(ai h, ae... o) : bd(h) {
p<ai>::ao(h, ar(), o...);
}
ac *ar() { return bd.av.ar(); }
bb bd;
};
struct be {
template <typename ac, typename ai, typename... ae>
be(ac *&h, ay<ai> o, ae... ap) {
typedef ba<ac, ai> bf;
typename bf::bc bg;
auto bh = at(bg);
bf *bj = bh.as();
auto bk = new (bj) bf(o.az, ap...);
h = bk->ar();
}
};
template <typename ac> struct z {
using bl = ac;
bl *operator->() {
bl *bm = static_cast<aw<ac> *>(this)->as();
return bm;
}
};
template <typename ac> struct aw : z<ac> {
using bl = ac;
bl *as() { return ar; }
template <typename ai, typename... ae> aw(ai h, ae... o) : bn(ar, h, o...) {}
bl *ar;
be bn;
};
template <typename ac> struct bo : aw<ac> {
template <typename ai, typename... ae> bo(ai h, ae... o) : aw<ac>(h, o...) {}
};
template <typename ac, typename ai, typename... ae> bo<ac> bp(ai h, ae... o) {
return bo<ac>(ay<ai>{h}, o...);
}
template <typename ac, typename... ae> bo<ac> bq(ae... h) {
return bp<ac>(ag<int>(), h...);
}
} // namespace std
struct br;
struct bs {
bs(std::coroutine_handle<br>);
int x() { return bt; }
void bu() { bt = 0; }
void t() { bv.l(); }
std::coroutine_handle<br> bv;
int bt;
};
struct bw {
using promise_type = br;
bw(std::coroutine_handle<promise_type> h) : v(bq<bs>(h)) {}
void t() { v->t(); }
std::bo<bs> r() { return v; }
std::bo<bs> v;
};
struct bx {
int await_ready() { return 0; }
void await_resume() {}
void await_suspend(std::coroutine_handle<br>);
bw by;
};
struct br {
auto initial_suspend() { return std::ab(); }
auto final_suspend() noexcept { return std::ab(); }
auto get_return_object() { return std::coroutine_handle<br>::g(*this); }
void unhandled_exception() {}
auto await_transform(bw h) { return bx(h); }
void return_void() {}
void u(bs *h) { v = h; }
bs *r() { return v; }
bs *v;
};
// When reduced including headers, the following is required under
// clang -O2 @ 54225c457a336b1609c6d064b2b606a9238a28b9, otherwise this
// entire function is optimized out which masks the problem since the crash
// occurs at `q->x()` (q is nullptr).
#pragma clang optimize off
void bx::await_suspend(std::coroutine_handle<br> h) {
auto bi = h.m();
auto q = bi.r();
auto s = by.r();
if (q->x())
s->bu();
by.t();
}
#pragma clang optimize on
bs::bs(std::coroutine_handle<br> h) {
bt = 0;
bv = h;
auto &bz = h.m();
bz.u(this);
}
bw ca() { co_return; }
bw cb() { co_await ca(); }
bw cc = cb();
int main() {
bw cd(cc);
cd.t();
} Reproduction StepsLatest Clang -- Crashes$ clang++ --version
clang version 20.0.0git (https://github.com/llvm/llvm-project.git 381a803da253b75c8b7b10bb732e9e90925185e8)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: <redacted>
$ clang++ -O0 -std=c++20 reduced-Version-A.cpp -o reduced-Version-A.out
$ ./reduced-Version-A.out
cb (co_await) called
ca (co_return) called
$ clang++ -O2 -std=c++20 reduced-Version-A.cpp -o reduced-Version-A.out
$ ./reduced-Version-A.out
cb (co_await) called
Segmentation fault (core dumped)
$ clang++ -O0 -std=c++20 reduced-Version-B.cpp -o reduced-Version-B.out
$ ./reduced-Version-B.out
$ clang++ -O2 -std=c++20 reduced-Version-B.cpp -o reduced-Version-B.out
$ ./reduced-Version-B.out
Segmentation fault (core dumped) Clang At Bisected Commit -- Crashes$ clang++ --version
clang version 17.0.0 (https://github.com/llvm/llvm-project.git 54225c457a336b1609c6d064b2b606a9238a28b9)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: <redacted>
$ clang++ -O0 -std=c++20 reduced-Version-A.cpp -o reduced-Version-A.out
$ ./reduced-Version-A.out
cb (co_await) called
ca (co_return) called
$ clang++ -O2 -std=c++20 reduced-Version-A.cpp -o reduced-Version-A.out
$ ./reduced-Version-A.out
cb (co_await) called
Segmentation fault (core dumped)
$ clang++ -O0 -std=c++20 reduced-Version-B.cpp -o reduced-Version-B.out
$ ./reduced-Version-B.out
$ clang++ -O2 -std=c++20 reduced-Version-B.cpp -o reduced-Version-B.out
$ ./reduced-Version-B.out
Segmentation fault (core dumped) Clang Before Bisected Commit -- Does not crash$ clang++ --version
clang version 17.0.0 (https://github.com/llvm/llvm-project.git 32be3405f57f1e4d0ec0da943434113450583e89)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: <redacted>
$ clang++ -O0 -std=c++20 reduced-Version-A.cpp -o reduced-Version-A.out
$ ./reduced-Version-A.out
cb (co_await) called
ca (co_return) called
$ clang++ -O2 -std=c++20 reduced-Version-A.cpp -o reduced-Version-A.out
$ ./reduced-Version-A.out
cb (co_await) called
ca (co_return) called
$ clang++ -O0 -std=c++20 reduced-Version-B.cpp -o reduced-Version-B.out
$ ./reduced-Version-B.out
$ clang++ -O2 -std=c++20 reduced-Version-B.cpp -o reduced-Version-B.out
$ ./reduced-Version-B.out |
Fix #56532 Effectively, this reverts behavior introduced in https://reviews.llvm.org/D117087, which did two things: 1. Change delayed to early conversion of return object. 2. Introduced RVO possibilities because of early conversion. This patches fixes (1) and removes (2). I already worked on a follow up for (2) in a separated patch. I believe it's important to split these two because if the RVO causes any problems we can explore reverting (2) while maintaining (1). Notes on some testcase changes: - `pr59221.cpp` changed to `-O1` so we can check that the front-end honors the value checked for. Sounds like `-O3` without RVO is more likely to work with LLVM optimizations... - Comment out delete members `coroutine-no-move-ctor.cpp` since behavior now requires copies again. Differential Revision: https://reviews.llvm.org/D145639
Hi, Just a small update on this issue. I noticed that if I replace Inspection of an AST dump shows that when providing the return type explicitly, the implicit Is it possible some aspect of the AST is misconfigured when using 'auto' as I provide in my reproducers that's leading to a runtime crash when optimizations are enabled? Or is this simply a misunderstanding of the coroutine specification on my part (i.e. explicit return type for |
cc @bricknerb @usx95 this seems like a coro frontend issue that we might wish to investigate |
@dgg5503, can you perhaps clarify why do you think the logic in the code should is valid and what do you think is wrong with the way it runs? |
I've tried to debug this for a while and simplified the logic to https://godbolt.org/z/9jq75vq6c. Some more context that might be relevant: #56532. |
so I was playing around a little bit just OOC. https://godbolt.org/z/9jYdTzsY4 demonstrates another reproducer, one important highlight is despite setting two variables in the same function, only the non-atomic read fails. So I guess we're missing some write/read dependencies and some backend pass is just eliminating this dead write. Unfortunately I don't know much about the backend to be more useful :( |
Thanks, I think I thought this was a frontend issue because it seemed to depend on You can clearly see in the godbolt example that DSE deletes the non-atomic store before returning from the pre-split coroutine representation. I think the following IR snippets explain the issue:
It looks like C++ coroutines model the promise type as an alloca (local variable) called The correct fix is... not to model things that outlive the function as allocas. cc @zmodem @ChuanqiXu9 @alanzhao1 @alinas |
…/llvm-project#105595. Add clang to prepare.py. Default to clang on windows, fix missing debug info in clang release build
Summary
Starting with Clang 17 at commit 54225c4, attempting to execute the provided reduced programs compiled with
clang
using-O2
results in a crash at runtime.Specifically, the crash occurs in the function
void bx::await_suspend
when the result ofq
, a null pointer, is dereferenced to callx()
.I have confirmed that both reproducers run without triggering sanitizers in my environment (address, memory, undefined, etc.).
Reproducers
reduced-Version-A.cpp
-- https://godbolt.org/z/bxEf8Tfc6Here is the same reproducer reduced including headers from glibc 2.35 / glibcxx 3.4.30
reduced-Version-B.cpp
-- https://godbolt.org/z/fsaKof3EEReproduction Steps
Latest Clang -- Crashes
Clang At Bisected Commit -- Crashes
Clang Before Bisected Commit -- Does not crash
The text was updated successfully, but these errors were encountered: