@@ -142,12 +142,12 @@ entry:
142142
143143if.then:
144144 %s2 = shufflevector <16 x i8 > %b , <16 x i8 > undef , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
145- %vmull0 = tail call <8 x i16 > @llvm.aarch64.neon.umull.v8i16 (<8 x i8 > %s1 , <8 x i8 > %s2 ) # 3
145+ %vmull0 = tail call <8 x i16 > @llvm.aarch64.neon.umull.v8i16 (<8 x i8 > %s1 , <8 x i8 > %s2 )
146146 ret <8 x i16 > %vmull0
147147
148148if.else:
149149 %s4 = shufflevector <16 x i8 > %b , <16 x i8 > undef , <8 x i32 > <i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
150- %vmull1 = tail call <8 x i16 > @llvm.aarch64.neon.umull.v8i16 (<8 x i8 > %s3 , <8 x i8 > %s4 ) # 3
150+ %vmull1 = tail call <8 x i16 > @llvm.aarch64.neon.umull.v8i16 (<8 x i8 > %s3 , <8 x i8 > %s4 )
151151 ret <8 x i16 > %vmull1
152152}
153153
@@ -174,12 +174,12 @@ entry:
174174
175175if.then:
176176 %s2 = shufflevector <16 x i8 > %b , <16 x i8 > undef , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
177- %vmull0 = tail call <8 x i16 > @llvm.aarch64.neon.smull.v8i16 (<8 x i8 > %s1 , <8 x i8 > %s2 ) # 3
177+ %vmull0 = tail call <8 x i16 > @llvm.aarch64.neon.smull.v8i16 (<8 x i8 > %s1 , <8 x i8 > %s2 )
178178 ret <8 x i16 > %vmull0
179179
180180if.else:
181181 %s4 = shufflevector <16 x i8 > %b , <16 x i8 > undef , <8 x i32 > <i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
182- %vmull1 = tail call <8 x i16 > @llvm.aarch64.neon.smull.v8i16 (<8 x i8 > %s3 , <8 x i8 > %s4 ) # 3
182+ %vmull1 = tail call <8 x i16 > @llvm.aarch64.neon.smull.v8i16 (<8 x i8 > %s3 , <8 x i8 > %s4 )
183183 ret <8 x i16 > %vmull1
184184}
185185
@@ -294,12 +294,12 @@ entry:
294294
295295if.then:
296296 %s2 = shufflevector <16 x i8 > %b , <16 x i8 > undef , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
297- %vmull0 = tail call <8 x i16 > @llvm.aarch64.neon.umull.v8i16 (<8 x i8 > %s1 , <8 x i8 > %s2 ) # 3
297+ %vmull0 = tail call <8 x i16 > @llvm.aarch64.neon.umull.v8i16 (<8 x i8 > %s1 , <8 x i8 > %s2 )
298298 ret <8 x i16 > %vmull0
299299
300300if.else:
301301 %s4 = shufflevector <16 x i8 > %b , <16 x i8 > undef , <8 x i32 > <i32 8 , i32 9 , i32 10 , i32 10 , i32 12 , i32 13 , i32 14 , i32 15 >
302- %vmull1 = tail call <8 x i16 > @llvm.aarch64.neon.umull.v8i16 (<8 x i8 > %s3 , <8 x i8 > %s4 ) # 3
302+ %vmull1 = tail call <8 x i16 > @llvm.aarch64.neon.umull.v8i16 (<8 x i8 > %s3 , <8 x i8 > %s4 )
303303 ret <8 x i16 > %vmull1
304304}
305305
@@ -1003,3 +1003,89 @@ entry:
10031003 %2 = sub <vscale x 8 x i16 > %0 , %1
10041004 ret <vscale x 8 x i16 > %2
10051005}
1006+
1007+ declare range(i64 0 , 65536 ) i64 @backsmith_pure_3 (ptr dead_on_return readonly captures(none) %0 , <8 x i8 > %BS_ARG_1 , i32 %BS_ARG_2 )
1008+ define i32 @dont_sink_calls (ptr %func_1_a ) {
1009+ ; CHECK-LABEL: @dont_sink_calls(
1010+ ; CHECK-NEXT: entry:
1011+ ; CHECK-NEXT: [[BYVAL_TEMP:%.*]] = alloca <16 x i16>, align 16
1012+ ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BYVAL_TEMP]])
1013+ ; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr [[BYVAL_TEMP]], align 16
1014+ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @backsmith_pure_3(ptr dead_on_return nonnull [[BYVAL_TEMP]], <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 10, i8 0, i8 0>, i32 0)
1015+ ; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[BYVAL_TEMP]])
1016+ ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[FUNC_1_A:%.*]], align 8
1017+ ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[TMP0]], 0
1018+ ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[CLEANUP:%.*]]
1019+ ; CHECK: if.end:
1020+ ; CHECK-NEXT: [[VQADDQ_V_I:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> <i8 3, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer)
1021+ ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[VQADDQ_V_I]], <16 x i8> poison, <16 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1022+ ; CHECK-NEXT: [[VECINIT21:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i64>
1023+ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[TMP2]], i64 0
1024+ ; CHECK-NEXT: [[VECINIT38:%.*]] = shufflevector <16 x i64> [[TMP3]], <16 x i64> poison, <16 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1025+ ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw <16 x i64> [[VECINIT38]], [[VECINIT21]]
1026+ ; CHECK-NEXT: store <16 x i64> [[MUL]], ptr [[FUNC_1_A]], align 128
1027+ ; CHECK-NEXT: br label [[CLEANUP]]
1028+ ; CHECK: cleanup:
1029+ ; CHECK-NEXT: ret i32 0
1030+ ;
1031+ entry:
1032+ %byval-temp = alloca <16 x i16 >, align 16
1033+ call void @llvm.lifetime.start.p0 (ptr nonnull %byval-temp )
1034+ store <16 x i16 > zeroinitializer , ptr %byval-temp , align 16
1035+ %call4 = call i64 @backsmith_pure_3 (ptr dead_on_return nonnull %byval-temp , <8 x i8 > <i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 10 , i8 0 , i8 0 >, i32 0 )
1036+ call void @llvm.lifetime.end.p0 (ptr nonnull %byval-temp )
1037+ %0 = load i64 , ptr %func_1_a , align 8
1038+ %tobool.not = icmp eq i64 %0 , 0
1039+ br i1 %tobool.not , label %if.end , label %cleanup
1040+
1041+ if.end: ; preds = %entry
1042+ %vqaddq_v.i = tail call <16 x i8 > @llvm.aarch64.neon.uqadd.v16i8 (<16 x i8 > <i8 3 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 >, <16 x i8 > zeroinitializer )
1043+ %1 = shufflevector <16 x i8 > %vqaddq_v.i , <16 x i8 > poison, <16 x i32 > <i32 0 , i32 2 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1044+ %vecinit21 = zext <16 x i8 > %1 to <16 x i64 >
1045+ %2 = insertelement <16 x i64 > poison, i64 %call4 , i64 0
1046+ %vecinit38 = shufflevector <16 x i64 > %2 , <16 x i64 > poison, <16 x i32 > <i32 0 , i32 0 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1047+ %mul = mul nuw nsw <16 x i64 > %vecinit38 , %vecinit21
1048+ store <16 x i64 > %mul , ptr %func_1_a
1049+ br label %cleanup
1050+
1051+ cleanup: ; preds = %entry, %if.end
1052+ ret i32 0
1053+ }
1054+
1055+ define i32 @dont_sink_loads (i1 %c , ptr %p1 , ptr %p2 ) {
1056+ ; CHECK-LABEL: @dont_sink_loads(
1057+ ; CHECK-NEXT: entry:
1058+ ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[P1:%.*]], align 8, !range [[RNG0:![0-9]+]], !noundef [[META1:![0-9]+]]
1059+ ; CHECK-NEXT: store i64 0, ptr [[P2:%.*]], align 8
1060+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_END:%.*]], label [[CLEANUP:%.*]]
1061+ ; CHECK: if.end:
1062+ ; CHECK-NEXT: [[VQADDQ_V_I:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> <i8 3, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer)
1063+ ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[VQADDQ_V_I]], <16 x i8> poison, <16 x i32> <i32 0, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1064+ ; CHECK-NEXT: [[VECINIT21:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i64>
1065+ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0
1066+ ; CHECK-NEXT: [[VECINIT38:%.*]] = shufflevector <16 x i64> [[TMP2]], <16 x i64> poison, <16 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1067+ ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw <16 x i64> [[VECINIT38]], [[VECINIT21]]
1068+ ; CHECK-NEXT: store <16 x i64> [[MUL]], ptr [[P1]], align 128
1069+ ; CHECK-NEXT: br label [[CLEANUP]]
1070+ ; CHECK: cleanup:
1071+ ; CHECK-NEXT: ret i32 0
1072+ ;
1073+ entry:
1074+ %call4 = load i64 , ptr %p1 , !range !0 , !noundef !{}
1075+ store i64 0 , ptr %p2
1076+ br i1 %c , label %if.end , label %cleanup
1077+
1078+ if.end: ; preds = %entry
1079+ %vqaddq_v.i = tail call <16 x i8 > @llvm.aarch64.neon.uqadd.v16i8 (<16 x i8 > <i8 3 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 , i8 0 >, <16 x i8 > zeroinitializer )
1080+ %1 = shufflevector <16 x i8 > %vqaddq_v.i , <16 x i8 > poison, <16 x i32 > <i32 0 , i32 2 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1081+ %vecinit21 = zext <16 x i8 > %1 to <16 x i64 >
1082+ %2 = insertelement <16 x i64 > poison, i64 %call4 , i64 0
1083+ %vecinit38 = shufflevector <16 x i64 > %2 , <16 x i64 > poison, <16 x i32 > <i32 0 , i32 0 , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1084+ %mul = mul nuw nsw <16 x i64 > %vecinit38 , %vecinit21
1085+ store <16 x i64 > %mul , ptr %p1
1086+ br label %cleanup
1087+
1088+ cleanup: ; preds = %entry, %if.end
1089+ ret i32 0
1090+ }
1091+ !0 = !{i64 0 , i64 128 }
0 commit comments