-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[GlobalOpt] Add range metadata to loads from constant global variables #127695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (Ralender) ChangesThis Change fixes #125003 I put the process of extracting range metadata from global in GlobalOpt because it is thematically linked and GlobalOpt is only 2 times in the standard O3 pipeline. Full diff: https://github.com/llvm/llvm-project/pull/127695.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 9586fc97a39f7..7744dde2965e6 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
@@ -2498,6 +2499,102 @@ OptimizeGlobalAliases(Module &M,
return Changed;
}
+static bool AddRangeMetadata(Module &M) {
+ const DataLayout &DL = M.getDataLayout();
+ bool Changed = false;
+
+ for (GlobalValue &Global : M.global_values()) {
+
+ auto *GV = dyn_cast<GlobalVariable>(&Global);
+ if (!GV || !GV->hasDefinitiveInitializer())
+ continue;
+
+ // To be able to go to the next GlobalVariable with a return
+ [&] {
+ uint64_t GlobalByteSize = DL.getTypeAllocSize(GV->getValueType());
+ unsigned BW = DL.getIndexTypeSizeInBits(GV->getType());
+
+ SmallVector<LoadInst *> ArrayLikeLoads;
+ Type *ElemTy = nullptr;
+
+ for (User *U : GV->users()) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ Type *GEPElemTy = GEP->getResultElementType();
+ if (!GEP->isInBounds() || !GEPElemTy->isIntegerTy())
+ continue;
+
+ // This restriction that all accesses use the same type could be
+ // lifted
+ if (!ElemTy)
+ ElemTy = GEPElemTy;
+ else if (ElemTy != GEPElemTy)
+ return;
+
+ SmallMapVector<Value *, APInt, 4> Index;
+ APInt CstOffset(BW, 0);
+ GEP->collectOffset(DL, BW, Index, CstOffset);
+
+ // This check is needed for correctness of the code below.
+ // Be we could only traverse the range starting at the constant offset
+ if (!CstOffset.isAligned(DL.getPrefTypeAlign(GEPElemTy)))
+ return;
+
+ // The restriction that this is a 1D array could be lifted
+ if (Index.size() != 1 ||
+ Index.front().second != DL.getTypeAllocSize(GEPElemTy))
+ return;
+
+ for (User *U : GEP->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(U)) {
+ // This restriction that all accesses use the same type could be
+ // lifted
+ if (LI->getType() == GEPElemTy)
+ ArrayLikeLoads.push_back(LI);
+ else
+ return;
+ }
+ }
+ }
+ }
+
+ if (ArrayLikeLoads.empty())
+ return;
+
+ APInt Idx = APInt::getZero(64);
+ APInt Min = APInt::getSignedMaxValue(
+ ArrayLikeLoads[0]->getType()->getIntegerBitWidth());
+ APInt Max = APInt::getSignedMinValue(
+ ArrayLikeLoads[0]->getType()->getIntegerBitWidth());
+
+ uint64_t ElemSize = DL.getTypeStoreSize(ArrayLikeLoads[0]->getType());
+ uint64_t NumElem =
+ GlobalByteSize / DL.getTypeStoreSize(ArrayLikeLoads[0]->getType());
+ for (uint64_t i = 0; i < NumElem; i++) {
+ Constant *Cst = ConstantFoldLoadFromConstPtr(
+ GV, ArrayLikeLoads[0]->getType(), Idx, DL);
+
+ if (!Cst)
+ return;
+
+ Idx += ElemSize;
+
+ // MD_range data is expected in signed order, so we use smin and smax
+ // here
+ Min = APIntOps::smin(Min, Cst->getUniqueInteger());
+ Max = APIntOps::smax(Max, Cst->getUniqueInteger());
+ }
+
+ llvm::MDBuilder MDHelper(M.getContext());
+ // The Range is allowed to wrap
+ llvm::MDNode *RNode = MDHelper.createRange(Min, Max + 1);
+ for (LoadInst *LI : ArrayLikeLoads)
+ LI->setMetadata(LLVMContext::MD_range, RNode);
+ Changed = true;
+ }();
+ }
+ return Changed;
+}
+
static Function *
FindAtExitLibFunc(Module &M,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
@@ -2887,6 +2984,10 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL,
Changed |= LocalChange;
}
+ // Add range metadata to loads from constant global variables based on the
+ // values that could be loaded from the variable
+ Changed |= AddRangeMetadata(M);
+
// TODO: Move all global ctors functions to the end of the module for code
// layout.
diff --git a/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll
new file mode 100644
index 0000000000000..230e9f12726be
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll
@@ -0,0 +1,129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p globalopt -S %s | FileCheck %s
+
+@gvar0 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 -5, i64 1, i64 10, [253 x i64] zeroinitializer }> }, align 8
+@gvar1 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 5, [253 x i64] zeroinitializer }> }, align 8
+@gvar2 = global { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 2, [253 x i64] zeroinitializer }> }, align 8
+@gvar3 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16
+@gvar4 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16
+@gvar5 = constant [2 x [6 x i8]] [[6 x i8] c"\01a_\02-0", [6 x i8] c" \0E\FF\07\08\09"], align 1
+
+define i64 @test_basic0(i64 %3) {
+; CHECK-LABEL: define i64 @test_basic0(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0:![0-9]+]]
+; CHECK-NEXT: ret i64 [[TMP2]]
+;
+ %ptr = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 %3
+ %5 = load i64, ptr %ptr, align 8
+ ret i64 %5
+}
+
+define i64 @test_basic1(i64 %3) {
+; CHECK-LABEL: define i64 @test_basic1(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0]]
+; CHECK-NEXT: ret i64 [[TMP2]]
+;
+ %ptr = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 %3
+ %5 = load i64, ptr %ptr, align 8
+ ret i64 %5
+}
+
+define i32 @test_different_type(i64 %3) {
+; CHECK-LABEL: define i32 @test_different_type(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8, !range [[RNG1:![0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %ptr = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 %3
+ %5 = load i32, ptr %ptr, align 8
+ ret i32 %5
+}
+
+define i32 @test_non_constant(i64 %3) {
+; CHECK-LABEL: define i32 @test_non_constant(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar2, i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %ptr = getelementptr inbounds [512 x i32], ptr @gvar2, i64 0, i64 %3
+ %5 = load i32, ptr %ptr, align 8
+ ret i32 %5
+}
+
+define i64 @test_other(i8 %first_idx) {
+; CHECK-LABEL: define i64 @test_other(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar3, i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8, !range [[RNG2:![0-9]+]]
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %idxprom = zext i8 %first_idx to i64
+ %arrayidx = getelementptr inbounds i64, ptr @gvar3, i64 %idxprom
+ %0 = load i64, ptr %arrayidx, align 8
+ ret i64 %0
+}
+
+; This could be supported but is rare and more complex for for now we dont process it.
+define i64 @test_multiple_types0(i8 %first_idx) {
+; CHECK-LABEL: define i64 @test_multiple_types0(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar4, i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %idxprom = zext i8 %first_idx to i64
+ %arrayidx = getelementptr inbounds i64, ptr @gvar4, i64 %idxprom
+ %0 = load i64, ptr %arrayidx, align 8
+ ret i64 %0
+}
+
+define i32 @test_multiple_types1(i8 %first_idx) {
+; CHECK-LABEL: define i32 @test_multiple_types1(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr @gvar4, i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %idxprom = zext i8 %first_idx to i64
+ %arrayidx = getelementptr inbounds i32, ptr @gvar4, i64 %idxprom
+ %0 = load i32, ptr %arrayidx, align 8
+ ret i32 %0
+}
+
+; This could be supported also be supported, but for now it not.
+define dso_local noundef signext i8 @multi_dimentional(i8 noundef zeroext %0, i8 noundef zeroext %1) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local noundef signext i8 @multi_dimentional(
+; CHECK-SAME: i8 noundef zeroext [[TMP0:%.*]], i8 noundef zeroext [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 [[TMP3]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: ret i8 [[TMP6]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = zext i8 %1 to i64
+ %5 = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 %3, i64 %4
+ %6 = load i8, ptr %5, align 1
+ ret i8 %6
+}
+
+;.
+; CHECK: [[RNG0]] = !{i64 -5, i64 11}
+; CHECK: [[RNG1]] = !{i32 0, i32 6}
+; CHECK: [[RNG2]] = !{i64 2, i64 36028801313924476}
+;.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like a reasonable idea.
See also the fold at llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp Lines 829 to 916 in db5bc8e
|
The only thing I dont understand Why it does llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp Lines 844 to 846 in db5bc8e
|
c2123a2
to
ad4e92e
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
ad4e92e
to
6ffd9ae
Compare
I added the logic similar to the after having wrote the code, maybe the process to find closed-forms of the Stride and Offset with alignment are too complicated and error prone. but this should be able to handle a wide variety of GEPs. |
ping |
This Change fixes #125003
I put the process of extracting range metadata from global in GlobalOpt because it is thematically linked and GlobalOpt is only 2 times in the standard O3 pipeline.
Also the logic only act on linear 1D arrays when all access to one global use the same type. but these resitriction could be lifted if there is interest.