Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 25f405e

Browse files
davemgreenc-rhodes
authored andcommitted
[CGP][AArch64] Do not sink instructions that might read/write memory. (#176182)
The test case's call instruction was being sank past the point where the memory it accessed was valid. Add a check that CGP does not try to sink instruction that might be invalid to move. Fixes #176095 (cherry picked from commit a4975a8)
1 parent 915ef14 commit 25f405e

2 files changed

Lines changed: 93 additions & 7 deletions

File tree

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7943,7 +7943,7 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
79437943

79447944
for (Use *U : reverse(OpsToSink)) {
79457945
auto *UI = cast<Instruction>(U->get());
7946-
if (isa<PHINode>(UI))
7946+
if (isa<PHINode>(UI) || UI->mayHaveSideEffects() || UI->mayReadFromMemory())
79477947
continue;
79487948
if (UI->getParent() == TargetBB) {
79497949
if (InstOrdering[UI] < InstOrdering[InsertPoint])

llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll

Lines changed: 92 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,12 @@ entry:
142142

143143
if.then:
144144
%s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
145-
%vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
145+
%vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2)
146146
ret <8 x i16> %vmull0
147147

148148
if.else:
149149
%s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
150-
%vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
150+
%vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4)
151151
ret <8 x i16> %vmull1
152152
}
153153

@@ -174,12 +174,12 @@ entry:
174174

175175
if.then:
176176
%s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
177-
%vmull0 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
177+
%vmull0 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %s1, <8 x i8> %s2)
178178
ret <8 x i16> %vmull0
179179

180180
if.else:
181181
%s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
182-
%vmull1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
182+
%vmull1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %s3, <8 x i8> %s4)
183183
ret <8 x i16> %vmull1
184184
}
185185

@@ -294,12 +294,12 @@ entry:
294294

295295
if.then:
296296
%s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
297-
%vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
297+
%vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2)
298298
ret <8 x i16> %vmull0
299299

300300
if.else:
301301
%s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 10, i32 12, i32 13, i32 14, i32 15>
302-
%vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
302+
%vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4)
303303
ret <8 x i16> %vmull1
304304
}
305305

@@ -1003,3 +1003,89 @@ entry:
10031003
%2 = sub <vscale x 8 x i16> %0, %1
10041004
ret <vscale x 8 x i16> %2
10051005
}
1006+
1007+
declare range(i64 0, 65536) i64 @backsmith_pure_3(ptr dead_on_return readonly captures(none) %0, <8 x i8> %BS_ARG_1, i32 %BS_ARG_2)
1008+
define i32 @dont_sink_calls(ptr %func_1_a) {
1009+
; CHECK-LABEL: @dont_sink_calls(
1010+
; CHECK-NEXT: entry:
1011+
; CHECK-NEXT: [[BYVAL_TEMP:%.*]] = alloca <16 x i16>, align 16
1012+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BYVAL_TEMP]])
1013+
; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr [[BYVAL_TEMP]], align 16
1014+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @backsmith_pure_3(ptr dead_on_return nonnull [[BYVAL_TEMP]], <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 10, i8 0, i8 0>, i32 0)
1015+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[BYVAL_TEMP]])
1016+
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[FUNC_1_A:%.*]], align 8
1017+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[TMP0]], 0
1018+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[CLEANUP:%.*]]
1019+
; CHECK: if.end:
1020+
; CHECK-NEXT: [[VQADDQ_V_I:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> <i8 3, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer)
1021+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[VQADDQ_V_I]], <16 x i8> poison, <16 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1022+
; CHECK-NEXT: [[VECINIT21:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i64>
1023+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[TMP2]], i64 0
1024+
; CHECK-NEXT: [[VECINIT38:%.*]] = shufflevector <16 x i64> [[TMP3]], <16 x i64> poison, <16 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1025+
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw <16 x i64> [[VECINIT38]], [[VECINIT21]]
1026+
; CHECK-NEXT: store <16 x i64> [[MUL]], ptr [[FUNC_1_A]], align 128
1027+
; CHECK-NEXT: br label [[CLEANUP]]
1028+
; CHECK: cleanup:
1029+
; CHECK-NEXT: ret i32 0
1030+
;
1031+
entry:
1032+
%byval-temp = alloca <16 x i16>, align 16
1033+
call void @llvm.lifetime.start.p0(ptr nonnull %byval-temp)
1034+
store <16 x i16> zeroinitializer, ptr %byval-temp, align 16
1035+
%call4 = call i64 @backsmith_pure_3(ptr dead_on_return nonnull %byval-temp, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 10, i8 0, i8 0>, i32 0)
1036+
call void @llvm.lifetime.end.p0(ptr nonnull %byval-temp)
1037+
%0 = load i64, ptr %func_1_a, align 8
1038+
%tobool.not = icmp eq i64 %0, 0
1039+
br i1 %tobool.not, label %if.end, label %cleanup
1040+
1041+
if.end: ; preds = %entry
1042+
%vqaddq_v.i = tail call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> <i8 3, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer)
1043+
%1 = shufflevector <16 x i8> %vqaddq_v.i, <16 x i8> poison, <16 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1044+
%vecinit21 = zext <16 x i8> %1 to <16 x i64>
1045+
%2 = insertelement <16 x i64> poison, i64 %call4, i64 0
1046+
%vecinit38 = shufflevector <16 x i64> %2, <16 x i64> poison, <16 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1047+
%mul = mul nuw nsw <16 x i64> %vecinit38, %vecinit21
1048+
store <16 x i64> %mul, ptr %func_1_a
1049+
br label %cleanup
1050+
1051+
cleanup: ; preds = %entry, %if.end
1052+
ret i32 0
1053+
}
1054+
1055+
define i32 @dont_sink_loads(i1 %c, ptr %p1, ptr %p2) {
1056+
; CHECK-LABEL: @dont_sink_loads(
1057+
; CHECK-NEXT: entry:
1058+
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[P1:%.*]], align 8, !range [[RNG0:![0-9]+]], !noundef [[META1:![0-9]+]]
1059+
; CHECK-NEXT: store i64 0, ptr [[P2:%.*]], align 8
1060+
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_END:%.*]], label [[CLEANUP:%.*]]
1061+
; CHECK: if.end:
1062+
; CHECK-NEXT: [[VQADDQ_V_I:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> <i8 3, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer)
1063+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[VQADDQ_V_I]], <16 x i8> poison, <16 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1064+
; CHECK-NEXT: [[VECINIT21:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i64>
1065+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0
1066+
; CHECK-NEXT: [[VECINIT38:%.*]] = shufflevector <16 x i64> [[TMP2]], <16 x i64> poison, <16 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1067+
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw <16 x i64> [[VECINIT38]], [[VECINIT21]]
1068+
; CHECK-NEXT: store <16 x i64> [[MUL]], ptr [[P1]], align 128
1069+
; CHECK-NEXT: br label [[CLEANUP]]
1070+
; CHECK: cleanup:
1071+
; CHECK-NEXT: ret i32 0
1072+
;
1073+
entry:
1074+
%call4 = load i64, ptr %p1, !range !0, !noundef !{}
1075+
store i64 0, ptr %p2
1076+
br i1 %c, label %if.end, label %cleanup
1077+
1078+
if.end: ; preds = %entry
1079+
%vqaddq_v.i = tail call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> <i8 3, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer)
1080+
%1 = shufflevector <16 x i8> %vqaddq_v.i, <16 x i8> poison, <16 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1081+
%vecinit21 = zext <16 x i8> %1 to <16 x i64>
1082+
%2 = insertelement <16 x i64> poison, i64 %call4, i64 0
1083+
%vecinit38 = shufflevector <16 x i64> %2, <16 x i64> poison, <16 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1084+
%mul = mul nuw nsw <16 x i64> %vecinit38, %vecinit21
1085+
store <16 x i64> %mul, ptr %p1
1086+
br label %cleanup
1087+
1088+
cleanup: ; preds = %entry, %if.end
1089+
ret i32 0
1090+
}
1091+
!0 = !{i64 0, i64 128}

0 commit comments

Comments
 (0)