From 9b4f6f9e675b9266508f84bbab6d2130abcf9df0 Mon Sep 17 00:00:00 2001 From: Guozhi Wei Date: Fri, 9 May 2025 18:00:52 +0000 Subject: [PATCH 1/2] [CodeGenPrepare] Make sure instruction get from SunkAddrs is before MemoryInst Function optimizeBlock may do optimizations on a block for multiple times. In the first iteration of the loop, MemoryInst1 may generate a sunk instruction and store it into SunkAddrs. In the second iteration of the loop, MemoryInst2 may use the same address and then it can reuse the sunk instruction stored in SunkAddrs, but MemoryInst2 may be before MemoryInst1 and the corresponding sunk instruction. In order to avoid use before def error, we need to move the sunk instruction before MemoryInst2. It fixes issue 138208. --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 3 ++ .../CodeGenPrepare/X86/sink-addr-reuse.ll | 44 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index f9dcb472ed1d2..9d491120dcb39 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5913,6 +5913,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (SunkAddr) { LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); + Instruction *AddrInst = dyn_cast(SunkAddr); + if (AddrInst && MemoryInst->comesBefore(AddrInst)) + AddrInst->moveBefore(MemoryInst->getIterator()); if (SunkAddr->getType() != Addr->getType()) { if (SunkAddr->getType()->getPointerAddressSpace() != Addr->getType()->getPointerAddressSpace() && diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll new file mode 100644 index 0000000000000..019f311406550 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -p 'require,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +declare void @g(ptr) + +; %load and %load5 use the same address, %load5 is optimized first, %load is +; optimized later and reuse the same address computation instruction. We must +; make sure not to generate use before def error. + +define void @f(ptr %arg) { +; CHECK-LABEL: define void @f( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 +; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]]) +; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56 +; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8 +; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0 +; CHECK-NEXT: ret void +; +bb: + %getelementptr = getelementptr i8, ptr %arg, i64 -64 + %getelementptr1 = getelementptr i8, ptr %arg, i64 -56 + call void @g(ptr %getelementptr) + br label %bb3 + +bb3: + %load = load ptr, ptr %getelementptr, align 8 + %load4 = load i32, ptr %getelementptr1, align 8 + %load5 = load ptr, ptr %getelementptr, align 8 + %add = add i32 1, 0 + %icmp = icmp eq i32 %add, 0 + br i1 %icmp, label %bb7, label %bb7 + +bb7: + ret void +} From a7508909c07e047932f98e3cc3cc22f1cc144772 Mon Sep 17 00:00:00 2001 From: Guozhi Wei Date: Wed, 14 May 2025 00:39:04 +0000 Subject: [PATCH 2/2] Find an appropriate insert position for a sunk address instruction instead of just before MemoryInst. --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 44 +++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9d491120dcb39..f135c0a291f0c 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5771,6 +5771,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { return false; } +// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst +// is the first instruction that will use Addr. So we need to find the first +// user of Addr in current BB. +static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, + Value *SunkAddr) { + if (Addr->hasOneUse()) + return MemoryInst->getIterator(); + + // We already have a SunkAddr in current BB, but we may need to insert cast + // instruction after it. + if (SunkAddr) { + if (Instruction *AddrInst = dyn_cast(SunkAddr)) + return std::next(AddrInst->getIterator()); + } + + // Find the first user of Addr in current BB. + Instruction *Earliest = MemoryInst; + for (User *U : Addr->users()) { + Instruction *UserInst = dyn_cast(U); + if (UserInst && UserInst->getParent() == MemoryInst->getParent()) { + if (isa(UserInst) || UserInst->isDebugOrPseudoInst()) + continue; + if (UserInst->comesBefore(Earliest)) + Earliest = UserInst; + } + } + return Earliest->getIterator(); +} + /// Sink addressing mode computation immediate before MemoryInst if doing so /// can be done without increasing register pressure. The need for the /// register pressure constraint means this can end up being an all or nothing @@ -5895,11 +5924,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, return Modified; } - // Insert this computation right after this user. Since our caller is - // scanning from the top of the BB to the bottom, reuse of the expr are - // guaranteed to happen later. - IRBuilder<> Builder(MemoryInst); - // Now that we determined the addressing expression we want to use and know // that we have to sink it into this block. Check to see if we have already // done this for some other load/store instr in this block. If so, reuse @@ -5910,12 +5934,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); + + // The current BB may be optimized multiple times, we can't guarantee the + // reuse of Addr happens later, call findInsertPos to find an appropriate + // insert position. + IRBuilder<> Builder(MemoryInst->getParent(), + findInsertPos(Addr, MemoryInst, SunkAddr)); + if (SunkAddr) { LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); - Instruction *AddrInst = dyn_cast(SunkAddr); - if (AddrInst && MemoryInst->comesBefore(AddrInst)) - AddrInst->moveBefore(MemoryInst->getIterator()); if (SunkAddr->getType() != Addr->getType()) { if (SunkAddr->getType()->getPointerAddressSpace() != Addr->getType()->getPointerAddressSpace() &&