-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[DA] do not handle array accesses of different offsets #123436
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-testing-tools @llvm/pr-subscribers-llvm-analysis Author: Sebastian Pop (sebpop) ChangesThe patch adds a testcase that is not supposed to be disambiguated by the DA. Patch is 330.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123436.diff 37 Files Affected:
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index a4a98ea0bae1463..3f771a787897e2a 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -216,7 +216,8 @@ void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
PreservedAnalyses
DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
- OS << "'Dependence Analysis' for function '" << F.getName() << "':\n";
+ OS << "Printing analysis 'Dependence Analysis' for function '" << F.getName()
+ << "':\n";
dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F),
FAM.getResult<ScalarEvolutionAnalysis>(F),
NormalizeResults);
@@ -3601,14 +3602,10 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
return std::make_unique<Dependence>(Src, Dst);
}
- assert(isLoadOrStore(Src) && "instruction is not load or store");
- assert(isLoadOrStore(Dst) && "instruction is not load or store");
- Value *SrcPtr = getLoadStorePointerOperand(Src);
- Value *DstPtr = getLoadStorePointerOperand(Dst);
+ const MemoryLocation &DstLoc = MemoryLocation::get(Dst);
+ const MemoryLocation &SrcLoc = MemoryLocation::get(Src);
- switch (underlyingObjectsAlias(AA, F->getDataLayout(),
- MemoryLocation::get(Dst),
- MemoryLocation::get(Src))) {
+ switch (underlyingObjectsAlias(AA, F->getDataLayout(), DstLoc, SrcLoc)) {
case AliasResult::MayAlias:
case AliasResult::PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
@@ -3622,21 +3619,22 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
break; // The underlying objects alias; test accesses for dependence.
}
- // establish loop nesting levels
- establishNestingLevels(Src, Dst);
- LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
- LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
-
- FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
- ++TotalArrayPairs;
+ if (DstLoc.Size != SrcLoc.Size) {
+ // The dependence test gets confused if the size of the memory accesses
+ // differ.
+ LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
+ return std::make_unique<Dependence>(Src, Dst);
+ }
- unsigned Pairs = 1;
- SmallVector<Subscript, 2> Pair(Pairs);
+ Value *SrcPtr = getLoadStorePointerOperand(Src);
+ Value *DstPtr = getLoadStorePointerOperand(Dst);
const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
const SCEV *DstSCEV = SE->getSCEV(DstPtr);
LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
- if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) {
+ const SCEV *SrcBase = SE->getPointerBase(SrcSCEV);
+ const SCEV *DstBase = SE->getPointerBase(DstSCEV);
+ if (SrcBase != DstBase) {
// If two pointers have different bases, trying to analyze indexes won't
// work; we can't compare them to each other. This can happen, for example,
// if one is produced by an LCSSA PHI node.
@@ -3646,6 +3644,68 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
return std::make_unique<Dependence>(Src, Dst);
}
+
+ std::function<bool(ScalarEvolution *, const SCEV *, uint64_t)>
+ checkOffsetsAndStrides =
+ [&](ScalarEvolution *SE, const SCEV *V, uint64_t Size) -> bool {
+ if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
+ if (!checkOffsetsAndStrides(SE, AddRec->getStart(), Size))
+ return false;
+ return checkOffsetsAndStrides(SE, AddRec->getStepRecurrence(*SE), Size);
+ }
+ if (auto *Cst = dyn_cast<SCEVConstant>(V)) {
+ APInt C = Cst->getAPInt();
+ return C.srem(Size) == 0;
+ }
+ if (auto *Add = dyn_cast<SCEVAddExpr>(V)) {
+ // FIXME: DA cannot reason about expressions containing SSA names.
+ return true;
+ // If the expression contains variable names, i.e., "n + 1", then we
+ // cannot reason about the coefficients of V being multiples of Size.
+ if (SCEVExprContains(V, [](const SCEV *S) { return isa<SCEVUnknown>(S); }))
+ return false;
+ for (const SCEV *AddOp : Add->operands()) {
+ if (!checkOffsetsAndStrides(SE, AddOp, Size))
+ return false;
+ }
+ return true;
+ }
+ if (auto *Mul = dyn_cast<SCEVMulExpr>(V)) {
+ // FIXME: DA cannot reason about expressions containing SSA names.
+ return true;
+ // If the expression contains variable names, i.e., "n * 3", then we
+ // cannot reason about the coefficients of V being multiples of Size.
+ if (SCEVExprContains(V, [](const SCEV *S) { return isa<SCEVUnknown>(S); }))
+ return false;
+ for (const SCEV *MulOp : Mul->operands()) {
+ if (!checkOffsetsAndStrides(SE, MulOp, Size))
+ return false;
+ }
+ return true;
+ }
+ // SCEV node not handled yet.
+ return false;
+ };
+
+ // Check that memory access offsets are multiples of element sizes.
+ const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase);
+ const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);
+ if (!checkOffsetsAndStrides(SE, SrcEv, SrcLoc.Size.toRaw()) ||
+ !checkOffsetsAndStrides(SE, DstEv, DstLoc.Size.toRaw())) {
+ LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
+ return std::make_unique<Dependence>(Src, Dst);
+ }
+
+ // establish loop nesting levels
+ establishNestingLevels(Src, Dst);
+ LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
+ LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
+
+ FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
+ ++TotalArrayPairs;
+
+ unsigned Pairs = 1;
+ SmallVector<Subscript, 2> Pair(Pairs);
Pair[0].Src = SrcSCEV;
Pair[0].Dst = DstSCEV;
diff --git a/llvm/test/Analysis/DependenceAnalysis/AA.ll b/llvm/test/Analysis/DependenceAnalysis/AA.ll
index a478143720d04f4..173744a07ef9688 100644
--- a/llvm/test/Analysis/DependenceAnalysis/AA.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/AA.ll
@@ -1,99 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -disable-output "-passes=print<da>" \
; RUN: "-aa-pipeline=basic-aa,tbaa" 2>&1 | FileCheck %s
-; CHECK-LABEL: 'Dependence Analysis' for function 'test_no_noalias'
-; CHECK: da analyze - none!
-; CHECK: da analyze - confused!
-; CHECK: da analyze - none!
define void @test_no_noalias(ptr %A, ptr %B) {
+; CHECK-LABEL: 'test_no_noalias'
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4 --> Dst: store i32 1, ptr %A, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4 --> Dst: store i32 2, ptr %B, align 4
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: store i32 2, ptr %B, align 4 --> Dst: store i32 2, ptr %B, align 4
+; CHECK-NEXT: da analyze - none!
+;
store i32 1, ptr %A
store i32 2, ptr %B
ret void
}
-; CHECK-LABEL: test_one_noalias
-; CHECK: da analyze - none!
-; CHECK: da analyze - none!
-; CHECK: da analyze - none!
define void @test_one_noalias(ptr noalias %A, ptr %B) {
+; CHECK-LABEL: 'test_one_noalias'
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4 --> Dst: store i32 1, ptr %A, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4 --> Dst: store i32 2, ptr %B, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 2, ptr %B, align 4 --> Dst: store i32 2, ptr %B, align 4
+; CHECK-NEXT: da analyze - none!
+;
store i32 1, ptr %A
store i32 2, ptr %B
ret void
}
-; CHECK-LABEL: test_two_noalias
-; CHECK: da analyze - none!
-; CHECK: da analyze - none!
-; CHECK: da analyze - none!
define void @test_two_noalias(ptr noalias %A, ptr noalias %B) {
+; CHECK-LABEL: 'test_two_noalias'
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4 --> Dst: store i32 1, ptr %A, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4 --> Dst: store i32 2, ptr %B, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 2, ptr %B, align 4 --> Dst: store i32 2, ptr %B, align 4
+; CHECK-NEXT: da analyze - none!
+;
store i32 1, ptr %A
store i32 2, ptr %B
ret void
}
-; CHECK-LABEL: test_global_alias
-; CHECK: da analyze - none!
-; CHECK: da analyze - confused!
-; CHECK: da analyze - none!
@g = global i32 5
define void @test_global_alias(ptr %A) {
+; CHECK-LABEL: 'test_global_alias'
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4 --> Dst: store i32 1, ptr %A, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4 --> Dst: store i32 2, ptr @g, align 4
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: store i32 2, ptr @g, align 4 --> Dst: store i32 2, ptr @g, align 4
+; CHECK-NEXT: da analyze - none!
+;
store i32 1, ptr %A
store i32 2, ptr @g
ret void
}
-; CHECK-LABEL: test_global_noalias
-; CHECK: da analyze - none!
-; CHECK: da analyze - none!
-; CHECK: da analyze - none!
define void @test_global_noalias(ptr noalias %A) {
+; CHECK-LABEL: 'test_global_noalias'
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4 --> Dst: store i32 1, ptr %A, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4 --> Dst: store i32 2, ptr @g, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 2, ptr @g, align 4 --> Dst: store i32 2, ptr @g, align 4
+; CHECK-NEXT: da analyze - none!
+;
store i32 1, ptr %A
store i32 2, ptr @g
ret void
}
-; CHECK-LABEL: test_global_size
-; CHECK: da analyze - none!
-; CHECK: da analyze - confused!
-; CHECK: da analyze - none!
-; CHECK: da analyze - none!
-; CHECK: da analyze - confused!
-; CHECK: da analyze - none!
@a = global i16 5, align 2
@b = global ptr @a, align 4
define void @test_global_size() {
+; CHECK-LABEL: 'test_global_size'
+; CHECK-NEXT: Src: %l0 = load ptr, ptr @b, align 4 --> Dst: %l0 = load ptr, ptr @b, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: %l0 = load ptr, ptr @b, align 4 --> Dst: %l1 = load i16, ptr %l0, align 2
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: %l0 = load ptr, ptr @b, align 4 --> Dst: store i16 1, ptr @a, align 2
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: %l1 = load i16, ptr %l0, align 2 --> Dst: %l1 = load i16, ptr %l0, align 2
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: %l1 = load i16, ptr %l0, align 2 --> Dst: store i16 1, ptr @a, align 2
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: store i16 1, ptr @a, align 2 --> Dst: store i16 1, ptr @a, align 2
+; CHECK-NEXT: da analyze - none!
+;
%l0 = load ptr, ptr @b, align 4
%l1 = load i16, ptr %l0, align 2
store i16 1, ptr @a, align 2
ret void
}
-; CHECK-LABEL: test_tbaa_same
-; CHECK: da analyze - none!
-; CHECK: da analyze - confused!
-; CHECK: da analyze - none!
define void @test_tbaa_same(ptr %A, ptr %B) {
+; CHECK-LABEL: 'test_tbaa_same'
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4, !tbaa !0 --> Dst: store i32 1, ptr %A, align 4, !tbaa !0
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4, !tbaa !0 --> Dst: store i32 2, ptr %B, align 4, !tbaa !0
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: store i32 2, ptr %B, align 4, !tbaa !0 --> Dst: store i32 2, ptr %B, align 4, !tbaa !0
+; CHECK-NEXT: da analyze - none!
+;
store i32 1, ptr %A, !tbaa !5
store i32 2, ptr %B, !tbaa !5
ret void
}
-; CHECK-LABEL: test_tbaa_diff
-; CHECK: da analyze - none!
-; CHECK: da analyze - none!
-; CHECK: da analyze - none!
define void @test_tbaa_diff(ptr %A, ptr %B) {
+; CHECK-LABEL: 'test_tbaa_diff'
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4, !tbaa !0 --> Dst: store i32 1, ptr %A, align 4, !tbaa !0
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 1, ptr %A, align 4, !tbaa !0 --> Dst: store i16 2, ptr %B, align 2, !tbaa !4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i16 2, ptr %B, align 2, !tbaa !4 --> Dst: store i16 2, ptr %B, align 2, !tbaa !4
+; CHECK-NEXT: da analyze - none!
+;
store i32 1, ptr %A, !tbaa !5
store i16 2, ptr %B, !tbaa !9
ret void
}
-; CHECK-LABEL: tbaa_loop
-; CHECK: da analyze - input
-; CHECK: da analyze - none
-; CHECK: da analyze - output
define void @tbaa_loop(i32 %I, i32 %J, ptr nocapture %A, ptr nocapture readonly %B) {
+; CHECK-LABEL: 'tbaa_loop'
+; CHECK-NEXT: Src: %0 = load i16, ptr %arrayidx.us, align 4, !tbaa !0 --> Dst: %0 = load i16, ptr %arrayidx.us, align 4, !tbaa !0
+; CHECK-NEXT: da analyze - input [* *]!
+; CHECK-NEXT: Src: %0 = load i16, ptr %arrayidx.us, align 4, !tbaa !0 --> Dst: store i32 %add.us.lcssa, ptr %arrayidx6.us, align 4, !tbaa !4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 %add.us.lcssa, ptr %arrayidx6.us, align 4, !tbaa !4 --> Dst: store i32 %add.us.lcssa, ptr %arrayidx6.us, align 4, !tbaa !4
+; CHECK-NEXT: da analyze - output [*]!
+;
entry:
%cmp = icmp ne i32 %J, 0
%cmp122 = icmp ne i32 %I, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
index efc86d39b28ee1a..6768e9067dca386 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -disable-output -da-delinearize=false "-passes=print<da>" \
; RUN: -aa-pipeline=basic-aa 2>&1 | FileCheck %s
; RUN: opt < %s -disable-output -da-delinearize=false -passes='print<da><normalized-results>' \
@@ -15,31 +16,50 @@ target triple = "x86_64-apple-macosx10.6.0"
;; *B++ = A[10*i + j - 1];
define void @banerjee0(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
+; CHECK-LABEL: 'banerjee0'
+; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8
+; CHECK-NEXT: da analyze - flow [<= <>]!
+; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: store i64 %0, ptr %B.addr.11, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
+; CHECK-NEXT: da analyze - none!
+;
+; NORMALIZE-LABEL: 'banerjee0'
+; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
+; NORMALIZE-NEXT: da analyze - none!
+; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8
+; NORMALIZE-NEXT: da analyze - flow [<= <>]!
+; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
+; NORMALIZE-NEXT: da analyze - confused!
+; NORMALIZE-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8
+; NORMALIZE-NEXT: da analyze - none!
+; NORMALIZE-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
+; NORMALIZE-NEXT: da analyze - confused!
+; NORMALIZE-NEXT: Src: store i64 %0, ptr %B.addr.11, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
+; NORMALIZE-NEXT: da analyze - none!
+;
+; DELIN-LABEL: 'banerjee0'
+; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
+; DELIN-NEXT: da analyze - none!
+; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8
+; DELIN-NEXT: da analyze - flow [<= <>]!
+; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
+; DELIN-NEXT: da analyze - confused!
+; DELIN-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8
+; DELIN-NEXT: da analyze - none!
+; DELIN-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
+; DELIN-NEXT: da analyze - confused!
+; DELIN-NEXT: Src: store i64 %0, ptr %B.addr.11, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
+; DELIN-NEXT: da analyze - none!
+;
entry:
br label %for.cond1.preheader
-; CHECK: 'Dependence Analysis' for function 'banerjee0':
-; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<= <>]!
-; CHECK: da analyze - confused!
-; CHECK: da analyze - none!
-; CHECK: da analyze - confused!
-; CHECK: da analyze - none!
-
-; NORMALIZE: 'Dependence Analysis' for function 'banerjee0':
-; NORMALIZE: da analyze - none!
-; NORMALIZE: da analyze - flow [<= <>]!
-; NORMALIZE: da analyze - confused!
-; NORMALIZE: da analyze - none!
-; NORMALIZE: da analyze - confused!
-; NORMALIZE: da analyze - none!
-
-; DELIN: 'Dependence Analysis' for function 'banerjee0':
-; DELIN: da analyze - none!
-; DELIN: da analyze - flow [<= <>]!
-; DELIN: da analyze - confused!
-; DELIN: da analyze - none!
-; DELIN: da analyze - confused!
-; DELIN: da analyze - none!
for.cond1.preheader: ; preds = %entry, %for.inc7
%B.addr.04 = phi ptr [ %B, %entry ], [ %scevgep, %for.inc7 ]
@@ -81,34 +101,52 @@ for.end9: ; preds = %for.inc7
;; *B++ = A[10*i + j - 1];
define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
+; CHECK-LABEL: 'banerjee1'
+; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
+; CHECK-NEXT: da analyze - output [* *]!
+; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %2 = load i64, ptr %arrayidx6, align 8
+; CHECK-NEXT: da analyze - flow [* <>]!
+; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: %2 = load i64, ptr %arrayidx6, align 8
+; CHECK-NEXT: da analyze - input [* *]!
+; CHECK-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
+; CHECK-NEXT: da analyze - output [* *]!
+;
+; NORMALIZE-LABEL: 'banerjee1'
+; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
+; NORMALIZE-NEXT: da analyze - output [* *]!
+; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %2 = load i64, ptr %arrayidx6, align 8
+; NORMALIZE-NEXT: da analyze - flow [* <>]!
+; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
+; NORMALIZE-NEXT: da analyze - confused!
+; NORMALIZE-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: %2 = load i64, ptr %arrayidx6, align 8
+; NORMALIZE-NEXT: da analyze - input [* *]!
+; NORMALIZE-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
+; NORMALIZE-NEXT: da analyze - confused!
+; NORMALIZE-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
+; NORMALIZE-NEXT: da analyze - output [* *]!
+;
+; DELIN-LABEL: 'banerjee1'
+; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
+; DELIN-NEXT: da analyze - output [* *]!
+; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %2 = load i64, ptr %arrayidx6, align 8
+; DELIN-NEXT: da analyze - flow [* <>]!
+; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, a...
[truncated]
|
✅ With the latest revision this PR passed the undef deprecator. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
This is a work in progress patch to enable loop-interchange by default and is a continuation of the RFC: https://discourse.llvm.org/t/enabling-loop-interchange/82589 Basically, we promised to fix any compile-time and correctness issues in the different components involved here (loop-interchange and dependence analaysis.) before discussing enabling interchange by default. We think are close to complete this; I would like to explain where we are and wanted to check if there are any thoughts or concerns. A quick overview of the correctness and compile-time improvements that we have made include: Correctness: - [LoopInterchange] Remove 'S' Scalar Dependencies (llvm#119345) - [LoopInterchange] Fix overflow in cost calculation (llvm#111807) - [LoopInterchange] Handle LE and GE correctly (PR llvm#124901) @kasuga-fj - [DA] disambiguate evolution of base addresses (llvm#116628) Compile-times: - [LoopInterchange] Constrain number of load/stores in a loop (llvm#118973) - [LoopInterchange] Bail out early if minimum loop nest is not met (llvm#115128) - [LoopInterchange] Hoist isComputableLoopNest() in the control flow (llvm#124247) And in terms of remaining work, we think we are very close to fixing these depenence analysis issues: - [DA] do not handle array accesses of different offsets (llvm#123436) - [DA] Dependence analysis does not handle array accesses of different sizes (llvm#116630) - [DA] use NSW arithmetic llvm#116632 The compile-time increase with a geomean increase of 0.19% looks good (after committing llvm#124247), I think: stage1-O3: Benchmark kimwitu++ +0.10% sqlite3 +0.14% consumer-typeset +0.07% Bullet +0.06% tramp3d-v4 +0.21% mafft +0.39% ClamAVi +0.06% lencod +0.61% SPASS +0.17% 7zip +0.08% geomean +0.19% See also: http://llvm-compile-time-tracker.com/compare.php?from=19a7fe03b4f58c4f73ea91d5e63bc4c6e61f987b&to=b24f1367d68ee675ea93ecda4939208c6b68ae4b&stat=instructions%3Au We might want to look into lencod to see if we can improve more, but not sure it is strictly necessary.
724bddc
to
c4e67a9
Compare
return true; | ||
if (!Param) { | ||
Param = V; | ||
return true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand why if there is only a single SCEVUnknown involved, why is it automatically a multiple of EltSize
?
%ptr = getelementptr i8, ptr %base, i32 %offset
store i64 42, ptr %ptr
So the array is elements of 8 bytes, but the gep is incrementing by single bytes. If the SCEVUnknown %offset is not a multiple of 8, the check should fail?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If a parameter (ssa name invariant in loop) name %n is used as offset, then we assume that %n is a multiple of the array element size. If another parameter %m is also used as an offset of the same array, then the test fails because we do not know the relation between %n and %m.
We could collect all those parametric expressions and implement a runtime check (%n % 8 == 0) && (%m % 8 == 0) with assumptions like we do in Polly, although the dependence analysis will only provide the set of constraints as a context, and it should be the loop transforms to version the code according to validity constraints.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If a parameter (ssa name invariant in loop) name %n is used as offset, then we assume that %n is a multiple of the array element size. There isn't even a check that
%n
is loop-invariant.
That ssumption that obviously does not necessarily hold true.
I think what you mean is that DA is assuming that %n
is part of all index expressions and therefore has the same offset everywhere. If some index expressions use %m
instead, they may have different relative offsets.
I do not think this is sound. Some index expressions may also use %n + %n
, %n + 1
, or no %n
at all in which case they may have different relative offsets already. Alternatively, I may not have understood the logic here.
If %n
(or any other subexpression) is invariant to all loops and occurs in every index, I think it could considered part of the base pointer. ScalarEvolution::getPointerBase
atm looks into all SCEVAddExpr disregarding whether operands are loop invariant. Maybe that's a viable approach.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DA is assuming that %n is part of all index expressions and therefore has the same offset everywhere. If some index expressions use %m instead, they may have different relative offsets.
You are right. If both %n and %m are used to index elements in a same array, the difference %n - %m
is unknown at compilation time, and so the current dependence analysis result would be incorrect.
Some index expressions may also use %n + %n, %n + 1, or no %n at all in which case they may have different relative offsets already.
Correct.
This patch tries to prove at compile time that if an array offset is using %n, then all other array offsets will use %n in multiples of the array element size.
For example, if eltSize = 8
, and (%n mod 8) = x
then ((%n + 1) mod 8) = x + 1
.
Even if we do not know the value of x at compilation time, we know that 2 arrays accessing %n
and %n + 1
do not overlap correctly (not a multiple of 8) for the dependence test to produce a valid answer.
If %n (or any other subexpression) is invariant to all loops and occurs in every index, I think it could be considered part of the base pointer.
Correct.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DA is assuming that %n is part of all index expressions and therefore has the same offset everywhere. If some index expressions use %m instead, they may have different relative offsets.
You are right. If both %n and %m are used to index elements in a same array, the difference
%n - %m
is unknown at compilation time, and so the current dependence analysis result would be incorrect.
I am catching up on this, trying to let this sink in, but had this perhaps silly question in the meantime: if there are still correctness issues, can we not just return Confused for now when we detect that we have different SCEV expressions with different offsets? Or would this make DA so conservative that it will be useless?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Out of 43 in test/Analysis/DependenceAnalysis half are failing if I disable DA when the array access uses a parameter: base + parameter.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some index expressions may also use %n + %n, %n + 1, or no %n at all in which case they may have different relative offsets already.
Correct. This patch tries to prove at compile time that if an array offset is using %n, then all other array offsets will use %n in multiples of the array element size.
But does not check for the same offsets, it checks for at most one SCEVUnknown referenced.
For instance,
*(int*)((char*)BasePtr)
*(int*)((char*)BasePtr + n)
The first one should pass trivially by https://github.com/sebpop/llvm-project/blob/c7b5c98204ab580443f45d148994bc93827b5258/llvm/lib/Analysis/ScalarEvolution.cpp#L10992
The second should pass by https://github.com/sebpop/llvm-project/blob/c7b5c98204ab580443f45d148994bc93827b5258/llvm/lib/Analysis/ScalarEvolution.cpp#L11013 because a Param has not been set yet. However, there is nothing that ensures "n
is a multiple of sizeof(int)
", hence a partial/unaligned access relative to the first.
If %n (or any other subexpression) is invariant to all loops and occurs in every index, I think it could be considered part of the base pointer.
Correct.
But currently it is not?!? The base pointer is determined by ScalarEvolution::getPointerBase
which does not consider/know what is considered to be a parameter.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I fixed these issues in 60ba4e6 by recording runtime assumptions for all expressions containing parameters. These runtime assumptions can be generated as runtime if conditions to guard the code generated with the dependence info.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not familiar with DependenceAnalysis, but I've read the code and will comment on it. From what I've understood, I think what @Meinersbur said is correct.
This is just my guess, but does this patch tries to guarantee correctness based on the fact that SrcPtr
and DstPtr
are congruent modulo EltSize
if all index expressions are multiples of the same value (%n
)? I haven't thought about this enough to be certain that this approach is correct, but even if it is, I don't think it is properly implemented.
I have one more question. Is it insufficient to check the align
value is equal to EltSize
for both Src
and Dst
?
…sizes This fixes bug llvm#16183 where the elements of a same array are accesses as i32 and i64. This is not handled by the array dependence analysis.
Check that the offsets from memory base address are multiples of the array element size for a pair of memory accesses in a dependence test. The patch adds a testcase that cannot be disambiguated by the DA.
Co-authored-by: Michael Kruse <[email protected]>
Co-authored-by: Michael Kruse <[email protected]>
Co-authored-by: Michael Kruse <[email protected]>
Co-authored-by: Michael Kruse <[email protected]>
I added an RFC for the DependenceAnalysis interface change: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll check the discourse later.
SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions(); | ||
// Early fail when the dependence analysis has runtime assumptions. | ||
// FIXME: this could be handled by versioning the loop. | ||
if (!Assumptions.isAlwaysTrue()) | ||
return PreservedAnalyses::all(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this necessary? DependenceInfo doesn't seem to compute anything until depends
is called.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just above this is a call to CacheCost::getCacheCost that queries all dependences in the loop nest to compute cache reuse patterns.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's true, I missed it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, so does this mean that the user of CacheCostAnalysis
must check if DA assumes some conditions? I think it's inconvenient, I'd like an analysis pass to do as much as it can without any additional assumptions by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's inconvenient, I'd like an analysis pass to do as much as it can without any additional assumptions by default.
Can you consider this? For example, I think there is a way to provide some API to control the default policy that DependenceInfo
follows to determine if it is allowed to append runtime checks.
I don't fully understand this component yet, but not sure if "the offset is
Under the assumption
|
Do not check for symbolic names, just handle all possible SCEV expressions with compile time tests and record runtime predicates when compile time fails.
I added this as a testcase in 480ceff.
DA sees the following access functions:
The compile time check for |
I may have confused you, but I don't think this example directly causes any problems. What I'm concerned about is a case where a target loop has such a code fragment in its body. Looking at the testcases of Delinearization, the subscript for the second store can become If so, what happens if Delinearization succeeded only for the second store? I don't have any cases to check for such a situation, but I guess the subscripts would become like the following.
In this case the analysis still works fine, because we don't know the relation between |
If you have a testcase that shows the issue, I will add it to the tests. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you have a testcase that shows the issue, I will add it to the tests.
OK, I will think about it some more.
SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions(); | ||
// Early fail when the dependence analysis has runtime assumptions. | ||
// FIXME: this could be handled by versioning the loop. | ||
if (!Assumptions.isAlwaysTrue()) | ||
return PreservedAnalyses::all(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's inconvenient, I'd like an analysis pass to do as much as it can without any additional assumptions by default.
Can you consider this? For example, I think there is a way to provide some API to control the default policy that DependenceInfo
follows to determine if it is allowed to append runtime checks.
if (Assumptions.empty()) { | ||
Assumptions.append(Assume.begin(), Assume.end()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps this branch is unnecessary.
if (Assumptions.empty()) { | ||
Assumptions.append(Assume.begin(), Assume.end()); | ||
} else { | ||
// Add non-redundant assumptions. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(This is just a comment, not a request to fix anything.)
I realized that Assume
might contain redundant predicts, e.g.,
EltSize = 4
SrcSCEV = 2 * %n
DstSCEV = %n
First we will call isKnownMultipleOf(SrcEV, EltSize, Assume)
and it will add a predicate for (2 * n) % 4 == 0
. Next isKnownMultipleOf(DstEV, EltSize, Assume)
will be called and it will add n % 4 == 0
, which is stricter than the previous one.
In principle, if SCEVPredicate::implies
is complete, the current algorithm allows to add a predicate to Assume that might be stricter than the one previously added, but if I understand correctly
(NOTE: I think you should avoid force-pushing unless rebasing is necessary. Please read LLVM GitHub User Guid for more details.) |
I was concerned about like the following case.
I tried this case, then the delinearization itself succeeded and the subscripts for both stores became as follows
But it was rejected by delinearization check in DA, and the result of dependency analysis was correct. llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp Lines 3452 to 3469 in 3dbba8e
A slight modification of the above code, as shown below, bypasses this check and returns incorrect results. I'm not sure if this IR is legal, but if it is, I think we need to make some kind of fix for the delinearization. So, my question is that, is the below LLVM IR (using different
Output:
|
This patch has grown into doing at least 3 things, I think:
I guess it would be easier to untangle this and create separate (stacked) patches for this, so that it's easier to review and maintain (in case we would need to partially revert/recommit some of this later). E.g., it looks like the assumptions addition can be tested in isolation as I see support for it being added to dump functions, i.e. we don't need the offset fix for that. |
I agree with this. I think this PR started out trying to handle different stride/load/store sizes as much as possible without runtime assumptions, but the approach and implementation has changed and now contains several features that should be split into several patches. IMHO, I prefer the previous approach, i.e., rejecting cases where we need one or more runtime checks. The current implementation looks better than the previous one, so as the first step, it seems reasonable to me to give up cases where we need one or more runtime assumptions (i.e., Note that, if you want |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is an elegant way of moving this forward. The assumptions are off by default, so won't disturb other users. Other folks have looked at other parts of this patch, I am happy with the assumptions, so this LGTM. But please leave enough time for others to comment though, at least a couple of days, before merging this.
@@ -187,7 +187,7 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, | |||
if (DstI->mayReadOrWriteMemory()) { | |||
OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; | |||
OS << " da analyze - "; | |||
if (auto D = DA->depends(&*SrcI, &*DstI)) { | |||
if (auto D = DA->depends(&*SrcI, &*DstI, true)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (auto D = DA->depends(&*SrcI, &*DstI, true)) { | |
if (auto D = DA->depends(&*SrcI, &*DstI, /*UnderRuntimeAssumptions=*/true)) { |
assert(isLoadOrStore(Dst) && "instruction is not load or store"); | ||
Value *SrcPtr = getLoadStorePointerOperand(Src); | ||
Value *DstPtr = getLoadStorePointerOperand(Dst); | ||
const MemoryLocation &DstLoc = MemoryLocation::get(Dst); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it worth to keep the asserts?
} | ||
|
||
uint64_t EltSize = SrcLoc.Size.toRaw(); | ||
assert(EltSize == DstLoc.Size.toRaw() && "Array element size differ"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not mistaken, we check inequality of these things in line 3656, so they are guaranteed to be equal here and we can get rid of the assert, or can they change?
The patch adds a testcase that is not supposed to be disambiguated by the DA.
Most of the code of the filter is disabled under FIXME comments.
There is not much DA can do to prove properties on symbolic expressions.
Polly collects all assumptions under which data dependences and code-gen are valid.
DA needs a similar mechanism.
Without collecting assumptions, half the testsuite fails.