diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 9586fc97a39f7..d7f5fe5434785 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -2498,6 +2499,233 @@ OptimizeGlobalAliases(Module &M, return Changed; } +struct AccessPattern { + Type *Ty; + + APInt Stride; + APInt Offset; +}; + +template <> struct DenseMapInfo { + static inline AccessPattern getEmptyKey() { + return {(Type *)1, APInt(), APInt()}; + } + static inline AccessPattern getTombstoneKey() { + return {(Type *)2, APInt(), APInt()}; + } + static unsigned getHashValue(const AccessPattern &AP) { + return hash_combine(AP.Ty, AP.Stride, AP.Offset); + } + static bool isEqual(const AccessPattern &LHS, const AccessPattern &RHS) { + return LHS.Ty == RHS.Ty && LHS.Stride == RHS.Stride && + LHS.Offset == RHS.Offset; + } +}; + +// return (gcd, x, y) such that a*x + b*y = gcd +std::tuple ExtendedSignedGCD(APInt a, APInt b) { + unsigned BW = a.getBitWidth(); + APInt x = APInt(BW, 1); + APInt y = APInt(BW, 0); + APInt x1 = APInt(BW, 0); + APInt y1 = APInt(BW, 1); + + while (b != 0) { + APInt q = APInt(BW, 0); + APInt r = APInt(BW, 0); + APInt::sdivrem(a, b, q, r); + a = std::move(b); + b = std::move(r); + + std::swap(x, x1); + std::swap(y, y1); + x1 -= q * x; + y1 -= q * y; + } + return {a, x, y}; +} + +// Build if possible a new pair of Stride and Offset that are part of the +// original but are also aligned. +std::optional> +AlignStrideAndOffset(const APInt &Stride, const APInt &Offset, + const APInt &Align) { + // Here Offset * Align is added only to make sure Missing is positive or zero + APInt Missing = ((Offset * Align) - Offset).urem(Align); + + // fast path for common case, + if (Missing == 0) + return { + {(Stride * Align).udiv(APIntOps::GreatestCommonDivisor(Stride, Align)), + Offset}}; + + auto [GCD, X, Y] = ExtendedSignedGCD(Stride, Align); + assert(APIntOps::GreatestCommonDivisor(Stride, Align) == GCD); + assert((X * Stride + Y * Align) == GCD); + + if (Missing.urem(GCD) != 0) { + // The new Stride + Offset cannot be created because there is no elements in + // the original that would be properly aligned + return std::nullopt; + } + + APInt StrideAlign = Stride * Align; + // X could be negative, so we need to use sdiv + // Here + Offset * Align is added only to make sure Missing is positive + APInt NumStride = + (((Missing * X).sdiv(GCD)) + (StrideAlign * Align)).urem(Align); + + APInt NewOffset = Offset + (NumStride * Stride); + APInt NewStride = StrideAlign.udiv(GCD); + return {{std::move(NewStride), std::move(NewOffset)}}; +} + +static bool addRangeMetadata(Module &M) { + const DataLayout &DL = M.getDataLayout(); + bool Changed = false; + + for (GlobalValue &Global : M.global_values()) { + + auto *GV = dyn_cast(&Global); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + continue; + + // To be able to go to the next GlobalVariable with a return + [&] { + unsigned IndexBW = DL.getIndexTypeSizeInBits(GV->getType()); + + struct PointerInfo { + Value *Ptr; + + // Zero denotes not set + APInt Stride; + APInt Offset; + }; + + // GEPs only take one pointer operand, the one we will come from, so we + // dont need to do uniqueing during the DFS + SmallVector Stack; + + // All loads of the global that this code can analyze grouped by access + // pattern. Loads with the same access pattern can access the same offsets + // in the global, so they can be treated the same. + SmallDenseMap> LoadsByAccess; + + Stack.push_back({GV, APInt(IndexBW, 0), APInt(IndexBW, 0)}); + + while (!Stack.empty()) { + PointerInfo Curr = Stack.pop_back_val(); + + if (!isa(Curr.Ptr)) { + if (auto *LI = dyn_cast(Curr.Ptr)) { + + if (!LI->getType()->isIntegerTy()) + continue; + + if (LI->hasMetadata(LLVMContext::MD_range)) + continue; + + // This is an access at a fixed offset, I expect this is handled + // elsewhere so we skip it. + if (Curr.Stride == 0) + continue; + + // This case is very rare and weird, but what it means is that we + // dont know at runtime what offsets into the Global arrays are safe + // to access with this load. So we could run the following code + // ignoring alignment constraint from the load. but this case is + // rare and weird so we give-up. + if (LI->getAlign() > GV->getAlign().valueOrOne()) + continue; + + auto NewStrideAndOffset = + AlignStrideAndOffset(Curr.Stride, Curr.Offset, + APInt(IndexBW, LI->getAlign().value())); + + if (!NewStrideAndOffset) { + // This load cannot access an offset with the correct alignment + LI->replaceAllUsesWith(PoisonValue::get(LI->getType())); + continue; + } + + AccessPattern AP{LI->getType(), NewStrideAndOffset->first, + NewStrideAndOffset->second}; + assert(AP.Stride != 0); + LoadsByAccess[AP].push_back(LI); + continue; + } + auto *GEP = dyn_cast(Curr.Ptr); + if (!GEP) + continue; + + SmallMapVector VarOffsets; + if (!GEP->collectOffset(DL, IndexBW, VarOffsets, Curr.Offset)) + break; + + for (auto [V, Scale] : VarOffsets) { + + // Commented out because I dont understand why we would need this + // But it was part of getStrideAndModOffsetOfGEP + // // Only keep a power of two factor for non-inbounds + // if (!GEP->isInBounds()) + // Scale = + // APInt::getOneBitSet(Scale.getBitWidth(), + // Scale.countr_zero()); + + if (Curr.Stride == 0) + Curr.Stride = Scale; + else + Curr.Stride = APIntOps::GreatestCommonDivisor(Curr.Stride, Scale); + } + } + + for (User *U : Curr.Ptr->users()) { + if (isa(U)) { + Curr.Ptr = U; + Stack.push_back(Curr); + } + } + } + + for (auto [AP, Loads] : LoadsByAccess) { + { + APInt SMin = APInt::getSignedMaxValue(AP.Ty->getIntegerBitWidth()); + APInt SMax = APInt::getSignedMinValue(AP.Ty->getIntegerBitWidth()); + + APInt LastValidOffset = + APInt(IndexBW, DL.getTypeAllocSize(GV->getValueType()) - + DL.getTypeStoreSize(AP.Ty)); + for (APInt Offset = AP.Offset; Offset.ule(LastValidOffset); + Offset += AP.Stride) { + assert(Offset.isAligned(Loads[0]->getAlign())); + Constant *Cst = ConstantFoldLoadFromConstPtr(GV, AP.Ty, Offset, DL); + + if (!Cst) + // Lambda captures of a struct binding is only available starting + // in C++20, so we skip to the next element with goto + goto NextGroup; + + // MD_range is order agnostics + SMin = APIntOps::smin(SMin, Cst->getUniqueInteger()); + SMax = APIntOps::smax(SMax, Cst->getUniqueInteger()); + } + + MDBuilder MDHelper(M.getContext()); + + // The Range is allowed to wrap + MDNode *RNode = MDHelper.createRange(SMin, SMax + 1); + for (LoadInst *LI : Loads) + LI->setMetadata(LLVMContext::MD_range, RNode); + Changed = true; + } + NextGroup: + (void)0; // Label expect statements + } + }(); + } + return Changed; +} + static Function * FindAtExitLibFunc(Module &M, function_ref GetTLI, @@ -2887,6 +3115,10 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL, Changed |= LocalChange; } + // Add range metadata to loads from constant global variables based on the + // values that could be loaded from the variable + Changed |= addRangeMetadata(M); + // TODO: Move all global ctors functions to the end of the module for code // layout. diff --git a/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll new file mode 100644 index 0000000000000..b936e22a83edc --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll @@ -0,0 +1,434 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p globalopt -S %s | FileCheck %s + +@gvar0 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 -5, i64 1, i64 10, [253 x i64] zeroinitializer }> }, align 8 +@gvar1 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 5, [253 x i64] zeroinitializer }> }, align 8 +@gvar2 = global [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 1], align 16 +@gvar3 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16 +@gvar5 = constant [2 x [6 x i8]] [[6 x i8] c"\01a_\02-0", [6 x i8] c" \0E\FF\07\08\09"], align 1 + +%struct.A = type { i32, i8, ptr, i16, i8 } +@gvar6 = constant [2 x [2 x %struct.A]] [[2 x %struct.A] [%struct.A { i32 8, i8 97, ptr null, i16 9, i8 12 }, %struct.A { i32 -1, i8 107, ptr null, i16 7, i8 0 }], [2 x %struct.A] [%struct.A { i32 16, i8 46, ptr null, i16 59, i8 95 }, %struct.A { i32 0, i8 0, ptr null, i16 49, i8 100 }]], align 16 +%struct.B = type <{ i32, i8, ptr, i16, i8 }> +@gvar7 = constant [2 x [2 x %struct.B]] [[2 x %struct.B] [%struct.B <{ i32 8, i8 97, ptr null, i16 9, i8 12 }>, %struct.B <{ i32 -1, i8 107, ptr null, i16 7, i8 0 }>], [2 x %struct.B] [%struct.B <{ i32 16, i8 46, ptr null, i16 59, i8 95 }>, %struct.B <{ i32 0, i8 0, ptr null, i16 49, i8 100 }>]], align 32 +%struct.C = type { i32, i32, i32 } +@gvar8 = constant [34 x %struct.C] [%struct.C { i32 0, i32 1, i32 2 }, %struct.C { i32 3, i32 4, i32 5 }, %struct.C { i32 6, i32 7, i32 8 }, %struct.C { i32 9, i32 10, i32 11 }, %struct.C { i32 12, i32 13, i32 14 }, %struct.C { i32 15, i32 16, i32 17 }, %struct.C { i32 18, i32 19, i32 20 }, %struct.C { i32 21, i32 22, i32 23 }, %struct.C { i32 24, i32 25, i32 26 }, %struct.C { i32 27, i32 28, i32 29 }, %struct.C { i32 30, i32 31, i32 32 }, %struct.C { i32 33, i32 34, i32 35 }, %struct.C { i32 36, i32 37, i32 38 }, %struct.C { i32 39, i32 40, i32 41 }, %struct.C { i32 42, i32 43, i32 44 }, %struct.C { i32 45, i32 46, i32 47 }, %struct.C { i32 48, i32 49, i32 50 }, %struct.C { i32 51, i32 52, i32 53 }, %struct.C { i32 54, i32 55, i32 56 }, %struct.C { i32 57, i32 58, i32 59 }, %struct.C { i32 60, i32 61, i32 62 }, %struct.C { i32 63, i32 64, i32 65 }, %struct.C { i32 66, i32 67, i32 68 }, %struct.C { i32 69, i32 70, i32 71 }, %struct.C { i32 72, i32 73, i32 74 }, %struct.C { i32 75, i32 76, i32 77 }, %struct.C { i32 78, i32 79, i32 80 }, %struct.C { i32 81, i32 82, i32 83 }, %struct.C { i32 84, i32 85, i32 86 }, %struct.C { i32 87, i32 88, i32 89 }, %struct.C { i32 90, i32 91, i32 92 }, %struct.C { i32 93, i32 94, i32 95 }, %struct.C { i32 96, i32 97, i32 98 }, %struct.C { i32 99, i32 100, i32 101 }], align 16, align 256 +@gvar9 = constant [6 x [18 x i8]] [[18 x i8] c"\00\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10\11", [18 x i8] c"\12\13\14\15\16\17\18\19\1A\1B\1C\1D\1E\1F !\22#", [18 x i8] c"$%&'()*+,-./012345", [18 x i8] c"6789:;<=>?@ABCDEFG", [18 x i8] c"HIJKLMNOPQRSTUVWXY", [18 x i8] c"Z[\\]^_`abcdefghijk"], align 16 + +define i64 @test_basic0(i64 %3) { +; CHECK-LABEL: define i64 @test_basic0( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: ret i64 [[TMP2]] +; + %ptr = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 %3 + %5 = load i64, ptr %ptr, align 8 + ret i64 %5 +} + +define i64 @test_basic1(i64 %3) { +; CHECK-LABEL: define i64 @test_basic1( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0]] +; CHECK-NEXT: ret i64 [[TMP2]] +; + %ptr = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 %3 + %5 = load i64, ptr %ptr, align 8 + ret i64 %5 +} + +define i32 @test_different_type(i64 %3) { +; CHECK-LABEL: define i32 @test_different_type( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %ptr = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 %3 + %5 = load i32, ptr %ptr, align 8 + ret i32 %5 +} + +define i32 @test_non_constant(i64 %3) { +; CHECK-LABEL: define i32 @test_non_constant( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [8 x i32], ptr @gvar2, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %ptr = getelementptr inbounds [8 x i32], ptr @gvar2, i64 0, i64 %3 + %5 = load i32, ptr %ptr, align 8 + ret i32 %5 +} + +define i64 @test_other(i8 %first_idx) { +; CHECK-LABEL: define i64 @test_other( +; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar3, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: ret i64 [[TMP0]] +; +entry: + %idxprom = zext i8 %first_idx to i64 + %arrayidx = getelementptr inbounds i64, ptr @gvar3, i64 %idxprom + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +} + +; This could be supported but is rare and more complex for for now we dont process it. +define i64 @test_multiple_types0(i8 %first_idx) { +; CHECK-LABEL: define i64 @test_multiple_types0( +; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar3, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8, !range [[RNG2]] +; CHECK-NEXT: ret i64 [[TMP0]] +; +entry: + %idxprom = zext i8 %first_idx to i64 + %arrayidx = getelementptr inbounds i64, ptr @gvar3, i64 %idxprom + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +} + +define i32 @test_multiple_types1(i8 %first_idx) { +; CHECK-LABEL: define i32 @test_multiple_types1( +; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr @gvar3, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 8, !range [[RNG3:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %idxprom = zext i8 %first_idx to i64 + %arrayidx = getelementptr inbounds i32, ptr @gvar3, i64 %idxprom + %0 = load i32, ptr %arrayidx, align 8 + ret i32 %0 +} + +define i32 @test_overaligned_load(i8 %first_idx) { +; CHECK-LABEL: define i32 @test_overaligned_load( +; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr @gvar3, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 32 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %idxprom = zext i8 %first_idx to i64 + %arrayidx = getelementptr inbounds i32, ptr @gvar3, i64 %idxprom + %0 = load i32, ptr %arrayidx, align 32 + ret i32 %0 +} + +; This could be supported also be supported, but for now it not. +define dso_local signext i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local signext i8 @multi_dimentional0( +; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 [[TMP3]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1, !range [[RNG4:![0-9]+]] +; CHECK-NEXT: ret i8 [[TMP6]] +; + %3 = zext i8 %0 to i64 + %4 = zext i8 %1 to i64 + %5 = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 %3, i64 %4 + %6 = load i8, ptr %5, align 1 + ret i8 %6 +} + +define i64 @test_complex0(i8 %0, i8 %1) { +; CHECK-LABEL: define i64 @test_complex0( +; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 [[TMP3]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8, !range [[RNG5:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +; CHECK-NEXT: ret i64 [[TMP7]] +; + %3 = zext i8 %0 to i64 + %4 = zext i8 %1 to i64 + %5 = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 %3, i64 %4 + %6 = load i32, ptr %5, align 8 + %7 = sext i32 %6 to i64 + ret i64 %7 +} + +define i64 @test_multi_gep(i8 %0, i8 %1) { +; CHECK-LABEL: define i64 @test_multi_gep( +; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 [[TMP3]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x %struct.A], ptr [[GEP0]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP1]], align 16, !range [[RNG6:![0-9]+]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x %struct.A], ptr [[GEP0]], i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[B6:%.*]] = load i32, ptr [[GEP2]], align 4, !range [[RNG5]] +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; CHECK-NEXT: ret i64 [[TMP6]] +; + %3 = zext i8 %0 to i64 + %4 = zext i8 %1 to i64 + %gep0 = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 %3 + %gep1 = getelementptr inbounds [2 x %struct.A], ptr %gep0, i64 0, i64 %4 + %6 = load i32, ptr %gep1, align 16 + %gep2 = getelementptr inbounds [2 x %struct.A], ptr %gep0, i64 0, i64 %4 + %b6 = load i32, ptr %gep2, align 4 + %7 = sext i32 %6 to i64 + ret i64 %7 +} + +define i64 @test_complex1(i8 %0, i8 %1) { +; CHECK-LABEL: define i64 @test_complex1( +; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8, !range [[RNG7:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i16 [[TMP6]] to i64 +; CHECK-NEXT: ret i64 [[TMP7]] +; + %4 = zext i8 %0 to i64 + %5 = zext i8 %1 to i64 + %6 = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 %4, i64 %5, i32 3 + %7 = load i16, ptr %6, align 8 + %8 = zext i16 %7 to i64 + ret i64 %8 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) uwtable +define i64 @test_packed_struct0(i8 zeroext %0, i8 zeroext %1) { +; CHECK-LABEL: define i64 @test_packed_struct0( +; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 1, !range [[RNG7]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i16 [[TMP6]] to i64 +; CHECK-NEXT: ret i64 [[TMP7]] +; + %3 = zext i8 %0 to i64 + %4 = zext i8 %1 to i64 + %5 = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 %3, i64 %4, i32 3 + %6 = load i16, ptr %5, align 1 + %7 = zext i16 %6 to i64 + ret i64 %7 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) uwtable +define i64 @test_packed_struct_aligned(i8 zeroext %0, i8 zeroext %1) { +; CHECK-LABEL: define i64 @test_packed_struct_aligned( +; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = zext i16 poison to i64 +; CHECK-NEXT: ret i64 [[TMP7]] +; + %3 = zext i8 %0 to i64 + %4 = zext i8 %1 to i64 + %5 = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 %3, i64 %4, i32 3 + %6 = load i16, ptr %5, align 8 + %7 = zext i16 %6 to i64 + ret i64 %7 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) uwtable +define i64 @test_packed_struct_aligned2(i8 zeroext %0, i8 zeroext %1) { +; CHECK-LABEL: define i64 @test_packed_struct_aligned2( +; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = zext i16 poison to i64 +; CHECK-NEXT: ret i64 [[TMP7]] +; + %3 = zext i8 %0 to i64 + %4 = zext i8 %1 to i64 + %5 = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 %3, i64 %4, i32 3 + %6 = load i16, ptr %5, align 16 + %7 = zext i16 %6 to i64 + ret i64 %7 +} + +define i64 @test_alignment_stride0(i8 %0, i8 %1) { +; CHECK-LABEL: define i64 @test_alignment_stride0( +; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !range [[RNG8:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; CHECK-NEXT: ret i64 [[TMP6]] +; + %3 = zext i8 %0 to i64 + %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32 + %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0 + %5 = load i32, ptr %4, align 4 + %6 = sext i32 %5 to i64 + ret i64 %6 +} + +define i64 @test_alignment_stride1(i8 %0, i8 %1) { +; CHECK-LABEL: define i64 @test_alignment_stride1( +; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8, !range [[RNG9:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; CHECK-NEXT: ret i64 [[TMP6]] +; + %3 = zext i8 %0 to i64 + %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32 + %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0 + %5 = load i32, ptr %4, align 8 + %6 = sext i32 %5 to i64 + ret i64 %6 +} + +define i64 @test_alignment_stride2(i8 %0, i8 %1) { +; CHECK-LABEL: define i64 @test_alignment_stride2( +; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 16, !range [[RNG10:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; CHECK-NEXT: ret i64 [[TMP6]] +; + %3 = zext i8 %0 to i64 + %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32 + %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0 + %5 = load i32, ptr %4, align 16 + %6 = sext i32 %5 to i64 + ret i64 %6 +} + +define i64 @test_alignment_stride3(i8 %0, i8 %1) { +; CHECK-LABEL: define i64 @test_alignment_stride3( +; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 32, !range [[RNG11:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; CHECK-NEXT: ret i64 [[TMP6]] +; + %3 = zext i8 %0 to i64 + %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32 + %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0 + %5 = load i32, ptr %4, align 32 + %6 = sext i32 %5 to i64 + ret i64 %6 +} + +define i64 @test_strides(i8 %0) { +; CHECK-LABEL: define i64 @test_strides( +; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 14 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8, !range [[RNG12:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = sext i8 [[TMP4]] to i64 +; CHECK-NEXT: ret i64 [[TMP5]] +; + %3 = zext i8 %0 to i64 + %4 = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 %3, i64 14 + %5 = load i8, ptr %4, align 8 + %6 = sext i8 %5 to i64 + ret i64 %6 +} + +define i64 @test_strides_poison(i8 %0) { +; CHECK-LABEL: define i64 @test_strides_poison( +; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = sext i8 poison to i64 +; CHECK-NEXT: ret i64 [[TMP5]] +; + %3 = zext i8 %0 to i64 + %4 = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 %3, i64 7 + %5 = load i8, ptr %4, align 8 + %6 = sext i8 %5 to i64 + ret i64 %6 +} + +define i64 @test_strides2(i8 %0) { +; CHECK-LABEL: define i64 @test_strides2( +; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [5 x [19 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8, !range [[RNG13:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = sext i8 [[TMP4]] to i64 +; CHECK-NEXT: ret i64 [[TMP5]] +; + %3 = zext i8 %0 to i64 + %4 = getelementptr inbounds [5 x [19 x i8]], ptr @gvar9, i64 0, i64 %3, i64 8 + %5 = load i8, ptr %4, align 8 + %6 = sext i8 %5 to i64 + ret i64 %6 +} + +define i64 @test_strides3(i8 %0) { +; CHECK-LABEL: define i64 @test_strides3( +; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [12 x [9 x i8]], ptr @gvar9, i64 0, i64 2, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4, !range [[RNG14:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = sext i8 [[TMP4]] to i64 +; CHECK-NEXT: ret i64 [[TMP5]] +; + %3 = zext i8 %0 to i64 + %4 = getelementptr inbounds [12 x [9 x i8]], ptr @gvar9, i64 0, i64 2, i64 %3 + %5 = load i8, ptr %4, align 4 + %6 = sext i8 %5 to i64 + ret i64 %6 +} + +define i64 @test_strides4(i8 %0) { +; CHECK-LABEL: define i64 @test_strides4( +; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x [27 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 2, !range [[RNG15:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64 +; CHECK-NEXT: ret i64 [[TMP5]] +; + %3 = zext i8 %0 to i64 + %4 = getelementptr inbounds [4 x [27 x i8]], ptr @gvar9, i64 0, i64 %3, i64 2 + %5 = load i16, ptr %4, align 2 + %6 = sext i16 %5 to i64 + ret i64 %6 +} + +;. +; CHECK: [[RNG0]] = !{i64 -5, i64 11} +; CHECK: [[RNG1]] = !{i32 0, i32 6} +; CHECK: [[RNG2]] = !{i64 2, i64 36028801313924476} +; CHECK: [[RNG3]] = !{i32 -6789, i32 3} +; CHECK: [[RNG4]] = !{i8 -1, i8 98} +; CHECK: [[RNG5]] = !{i32 -1, i32 17} +; CHECK: [[RNG6]] = !{i32 8, i32 17} +; CHECK: [[RNG7]] = !{i16 7, i16 60} +; CHECK: [[RNG8]] = !{i32 32, i32 102} +; CHECK: [[RNG9]] = !{i32 32, i32 99} +; CHECK: [[RNG10]] = !{i32 32, i32 93} +; CHECK: [[RNG11]] = !{i32 32, i32 81} +; CHECK: [[RNG12]] = !{i8 32, i8 105} +; CHECK: [[RNG13]] = !{i8 8, i8 9} +; CHECK: [[RNG14]] = !{i8 20, i8 105} +; CHECK: [[RNG15]] = !{i16 770, i16 14649} +;.