Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6ce8424

Browse files
author
Alexey Bataev
committed
iiii
1 parent 1bc0f8e commit 6ce8424

36 files changed

Lines changed: 704 additions & 656 deletions

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 155 additions & 58 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/WebAssembly/slp-memory-interleave.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -mtriple=wasm32 -mattr=+simd128 -passes=slp-vectorizer %s | llc -mtriple=wasm32 -mattr=+simd128 -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s
1+
; RUN: opt -mtriple=wasm32 -mattr=+simd128 -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false %s | llc -mtriple=wasm32 -mattr=+simd128 -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s
22

33
%struct.TwoBytes = type { i8, i8 }
44
%struct.FourBytes = type { i8, i8, i8, i8 }

llvm/test/Transforms/PhaseOrdering/AArch64/reduce_submuladd.ll

Lines changed: 31 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -9,105 +9,38 @@ target triple = "aarch64"
99
define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3) {
1010
; CHECK-LABEL: define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii
1111
; CHECK-SAME: (ptr noundef readonly captures(none) [[TMP0:%.*]], ptr noundef readonly captures(none) [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
12-
; CHECK-NEXT: .preheader.i:
13-
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
1412
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP2]] to i64
15-
; CHECK-NEXT: [[TMP6:%.*]] = load <20 x float>, ptr [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
16-
; CHECK-NEXT: [[TMP7:%.*]] = load <20 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA4]]
17-
; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <20 x float> [[TMP6]], [[TMP7]]
18-
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <20 x float> [[TMP8]], [[TMP8]]
19-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 80
20-
; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA4]]
21-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
22-
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA4]]
23-
; CHECK-NEXT: [[TMP14:%.*]] = fsub fast float [[TMP11]], [[TMP13]]
24-
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast float [[TMP14]], [[TMP14]]
25-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP0]], i64 [[TMP5]]
26-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP1]], i64 [[TMP4]]
27-
; CHECK-NEXT: [[OP_RDX:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP15]], <20 x float> [[TMP9]])
28-
; CHECK-NEXT: [[TMP18:%.*]] = load <20 x float>, ptr [[TMP16]], align 4, !tbaa [[TBAA4]]
29-
; CHECK-NEXT: [[TMP19:%.*]] = load <20 x float>, ptr [[TMP17]], align 4, !tbaa [[TBAA4]]
30-
; CHECK-NEXT: [[TMP20:%.*]] = fsub fast <20 x float> [[TMP18]], [[TMP19]]
31-
; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <20 x float> [[TMP20]], [[TMP20]]
32-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP16]], i64 80
33-
; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4, !tbaa [[TBAA4]]
34-
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP17]], i64 80
35-
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA4]]
36-
; CHECK-NEXT: [[TMP26:%.*]] = fsub fast float [[TMP23]], [[TMP25]]
37-
; CHECK-NEXT: [[TMP27:%.*]] = fmul fast float [[TMP26]], [[TMP26]]
38-
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP16]], i64 [[TMP5]]
39-
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP17]], i64 [[TMP4]]
40-
; CHECK-NEXT: [[OP_RDX_1:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP27]], <20 x float> [[TMP21]])
41-
; CHECK-NEXT: [[OP_RDX3_1:%.*]] = fadd fast float [[OP_RDX_1]], [[OP_RDX]]
42-
; CHECK-NEXT: [[TMP30:%.*]] = load <20 x float>, ptr [[TMP28]], align 4, !tbaa [[TBAA4]]
43-
; CHECK-NEXT: [[TMP31:%.*]] = load <20 x float>, ptr [[TMP29]], align 4, !tbaa [[TBAA4]]
44-
; CHECK-NEXT: [[TMP32:%.*]] = fsub fast <20 x float> [[TMP30]], [[TMP31]]
45-
; CHECK-NEXT: [[TMP33:%.*]] = fmul fast <20 x float> [[TMP32]], [[TMP32]]
46-
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP28]], i64 80
47-
; CHECK-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA4]]
48-
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP29]], i64 80
49-
; CHECK-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA4]]
50-
; CHECK-NEXT: [[TMP38:%.*]] = fsub fast float [[TMP35]], [[TMP37]]
51-
; CHECK-NEXT: [[TMP39:%.*]] = fmul fast float [[TMP38]], [[TMP38]]
52-
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP28]], i64 [[TMP5]]
53-
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP29]], i64 [[TMP4]]
54-
; CHECK-NEXT: [[OP_RDX_2:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP39]], <20 x float> [[TMP33]])
55-
; CHECK-NEXT: [[OP_RDX3_2:%.*]] = fadd fast float [[OP_RDX_2]], [[OP_RDX3_1]]
56-
; CHECK-NEXT: [[TMP42:%.*]] = load <20 x float>, ptr [[TMP40]], align 4, !tbaa [[TBAA4]]
57-
; CHECK-NEXT: [[TMP43:%.*]] = load <20 x float>, ptr [[TMP41]], align 4, !tbaa [[TBAA4]]
58-
; CHECK-NEXT: [[TMP44:%.*]] = fsub fast <20 x float> [[TMP42]], [[TMP43]]
59-
; CHECK-NEXT: [[TMP45:%.*]] = fmul fast <20 x float> [[TMP44]], [[TMP44]]
60-
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP40]], i64 80
61-
; CHECK-NEXT: [[TMP47:%.*]] = load float, ptr [[TMP46]], align 4, !tbaa [[TBAA4]]
62-
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP41]], i64 80
63-
; CHECK-NEXT: [[TMP49:%.*]] = load float, ptr [[TMP48]], align 4, !tbaa [[TBAA4]]
64-
; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float [[TMP47]], [[TMP49]]
65-
; CHECK-NEXT: [[TMP51:%.*]] = fmul fast float [[TMP50]], [[TMP50]]
66-
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP40]], i64 [[TMP5]]
67-
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP41]], i64 [[TMP4]]
68-
; CHECK-NEXT: [[OP_RDX_3:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP51]], <20 x float> [[TMP45]])
69-
; CHECK-NEXT: [[OP_RDX3_3:%.*]] = fadd fast float [[OP_RDX_3]], [[OP_RDX3_2]]
70-
; CHECK-NEXT: [[TMP54:%.*]] = load <20 x float>, ptr [[TMP52]], align 4, !tbaa [[TBAA4]]
71-
; CHECK-NEXT: [[TMP55:%.*]] = load <20 x float>, ptr [[TMP53]], align 4, !tbaa [[TBAA4]]
72-
; CHECK-NEXT: [[TMP56:%.*]] = fsub fast <20 x float> [[TMP54]], [[TMP55]]
73-
; CHECK-NEXT: [[TMP57:%.*]] = fmul fast <20 x float> [[TMP56]], [[TMP56]]
74-
; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP52]], i64 80
75-
; CHECK-NEXT: [[TMP59:%.*]] = load float, ptr [[TMP58]], align 4, !tbaa [[TBAA4]]
76-
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP53]], i64 80
77-
; CHECK-NEXT: [[TMP61:%.*]] = load float, ptr [[TMP60]], align 4, !tbaa [[TBAA4]]
78-
; CHECK-NEXT: [[TMP62:%.*]] = fsub fast float [[TMP59]], [[TMP61]]
79-
; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP62]], [[TMP62]]
80-
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP52]], i64 [[TMP5]]
81-
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP53]], i64 [[TMP4]]
82-
; CHECK-NEXT: [[OP_RDX_4:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP63]], <20 x float> [[TMP57]])
83-
; CHECK-NEXT: [[OP_RDX3_4:%.*]] = fadd fast float [[OP_RDX_4]], [[OP_RDX3_3]]
84-
; CHECK-NEXT: [[TMP66:%.*]] = load <20 x float>, ptr [[TMP64]], align 4, !tbaa [[TBAA4]]
85-
; CHECK-NEXT: [[TMP67:%.*]] = load <20 x float>, ptr [[TMP65]], align 4, !tbaa [[TBAA4]]
86-
; CHECK-NEXT: [[TMP68:%.*]] = fsub fast <20 x float> [[TMP66]], [[TMP67]]
87-
; CHECK-NEXT: [[TMP69:%.*]] = fmul fast <20 x float> [[TMP68]], [[TMP68]]
88-
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP64]], i64 80
89-
; CHECK-NEXT: [[TMP71:%.*]] = load float, ptr [[TMP70]], align 4, !tbaa [[TBAA4]]
90-
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP65]], i64 80
91-
; CHECK-NEXT: [[TMP73:%.*]] = load float, ptr [[TMP72]], align 4, !tbaa [[TBAA4]]
92-
; CHECK-NEXT: [[TMP74:%.*]] = fsub fast float [[TMP71]], [[TMP73]]
93-
; CHECK-NEXT: [[TMP75:%.*]] = fmul fast float [[TMP74]], [[TMP74]]
94-
; CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP64]], i64 [[TMP5]]
95-
; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP65]], i64 [[TMP4]]
96-
; CHECK-NEXT: [[OP_RDX_5:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP75]], <20 x float> [[TMP69]])
97-
; CHECK-NEXT: [[OP_RDX3_5:%.*]] = fadd fast float [[OP_RDX_5]], [[OP_RDX3_4]]
98-
; CHECK-NEXT: [[TMP78:%.*]] = load <20 x float>, ptr [[TMP76]], align 4, !tbaa [[TBAA4]]
99-
; CHECK-NEXT: [[TMP79:%.*]] = load <20 x float>, ptr [[TMP77]], align 4, !tbaa [[TBAA4]]
100-
; CHECK-NEXT: [[TMP80:%.*]] = fsub fast <20 x float> [[TMP78]], [[TMP79]]
101-
; CHECK-NEXT: [[TMP81:%.*]] = fmul fast <20 x float> [[TMP80]], [[TMP80]]
102-
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP76]], i64 80
103-
; CHECK-NEXT: [[TMP83:%.*]] = load float, ptr [[TMP82]], align 4, !tbaa [[TBAA4]]
104-
; CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP77]], i64 80
105-
; CHECK-NEXT: [[TMP85:%.*]] = load float, ptr [[TMP84]], align 4, !tbaa [[TBAA4]]
106-
; CHECK-NEXT: [[TMP86:%.*]] = fsub fast float [[TMP83]], [[TMP85]]
107-
; CHECK-NEXT: [[TMP87:%.*]] = fmul fast float [[TMP86]], [[TMP86]]
108-
; CHECK-NEXT: [[OP_RDX_6:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP87]], <20 x float> [[TMP81]])
109-
; CHECK-NEXT: [[OP_RDX3_6:%.*]] = fadd fast float [[OP_RDX_6]], [[OP_RDX3_5]]
110-
; CHECK-NEXT: ret float [[OP_RDX3_6]]
13+
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP3]] to i64
14+
; CHECK-NEXT: br label [[DOTPREHEADER_I:%.*]]
15+
; CHECK: .preheader.i:
16+
; CHECK-NEXT: [[DOT027_I:%.*]] = phi ptr [ [[TMP0]], [[TMP4:%.*]] ], [ [[TMP23:%.*]], [[DOTPREHEADER_I]] ]
17+
; CHECK-NEXT: [[DOT01926_I:%.*]] = phi i32 [ 0, [[TMP4]] ], [ [[TMP26:%.*]], [[DOTPREHEADER_I]] ]
18+
; CHECK-NEXT: [[DOT02025_I:%.*]] = phi float [ 0.000000e+00, [[TMP4]] ], [ [[TMP25:%.*]], [[DOTPREHEADER_I]] ]
19+
; CHECK-NEXT: [[DOT02124_I:%.*]] = phi ptr [ [[TMP1]], [[TMP4]] ], [ [[TMP24:%.*]], [[DOTPREHEADER_I]] ]
20+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[DOT027_I]], i64 80
21+
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA4:![0-9]+]]
22+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[DOT02124_I]], i64 80
23+
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA4]]
24+
; CHECK-NEXT: [[TMP11:%.*]] = load <20 x float>, ptr [[DOT027_I]], align 4, !tbaa [[TBAA4]]
25+
; CHECK-NEXT: [[TMP12:%.*]] = load <20 x float>, ptr [[DOT02124_I]], align 4, !tbaa [[TBAA4]]
26+
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <22 x float> poison, float [[TMP8]], i64 20
27+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <22 x float> [[TMP13]], float [[DOT02025_I]], i64 21
28+
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <20 x float> [[TMP11]], <20 x float> poison, <22 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison>
29+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <22 x float> [[TMP15]], <22 x float> [[TMP14]], <22 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 42, i32 43>
30+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <22 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0.000000e+00>, float [[TMP10]], i64 20
31+
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <20 x float> [[TMP12]], <20 x float> poison, <22 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison>
32+
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <22 x float> [[TMP18]], <22 x float> [[TMP17]], <22 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 42, i32 43>
33+
; CHECK-NEXT: [[TMP20:%.*]] = fsub <22 x float> [[TMP16]], [[TMP19]]
34+
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <22 x float> [[TMP20]], float 1.000000e+00, i64 21
35+
; CHECK-NEXT: [[TMP22:%.*]] = fmul <22 x float> [[TMP20]], [[TMP21]]
36+
; CHECK-NEXT: [[TMP23]] = getelementptr inbounds [4 x i8], ptr [[DOT027_I]], i64 [[TMP5]]
37+
; CHECK-NEXT: [[TMP24]] = getelementptr inbounds [4 x i8], ptr [[DOT02124_I]], i64 [[TMP6]]
38+
; CHECK-NEXT: [[TMP25]] = tail call fast float @llvm.vector.reduce.fadd.v22f32(float 0.000000e+00, <22 x float> [[TMP22]])
39+
; CHECK-NEXT: [[TMP26]] = add nuw nsw i32 [[DOT01926_I]], 1
40+
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[TMP26]], 7
41+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label [[_ZL6REDUCEILI7EEFPKFS1_II_EXIT:%.*]], label [[DOTPREHEADER_I]], !llvm.loop [[LOOP8:![0-9]+]]
42+
; CHECK: _ZL6reduceILi7EEfPKfS1_ii.exit:
43+
; CHECK-NEXT: ret float [[TMP25]]
11144
;
11245
%5 = alloca ptr, align 8
11346
%6 = alloca ptr, align 8

llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,8 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
2424

2525
define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
2626
; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32(
27-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
28-
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]]
29-
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
30-
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SHIFT1]]
31-
; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
27+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
28+
; CHECK-NEXT: [[X210:%.*]] = tail call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP1]])
3229
; CHECK-NEXT: ret i32 [[X210]]
3330
;
3431
%x0 = extractelement <4 x i32> %x, i32 0

llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -486,11 +486,8 @@ define float @reduce_fast_float_case1(ptr %a) {
486486
; CHECK-LABEL: define float @reduce_fast_float_case1(
487487
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
488488
; CHECK-NEXT: [[ENTRY:.*:]]
489-
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4
490-
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16
491-
; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
492-
; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP0]])
493-
; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[TMP1]], [[LOAD4]]
489+
; CHECK-NEXT: [[TMP0:%.*]] = load <5 x float>, ptr [[A]], align 4
490+
; CHECK-NEXT: [[ADD4:%.*]] = call fast float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> [[TMP0]])
494491
; CHECK-NEXT: ret float [[ADD4]]
495492
;
496493
entry:

llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ define <vscale x 4 x i32> @build_vec_v4i32_reuse_0(<vscale x 2 x i32> %v0) {
9292
; CHECK-LABEL: @build_vec_v4i32_reuse_0(
9393
; CHECK-NEXT: [[V0_0:%.*]] = extractelement <vscale x 2 x i32> [[V0:%.*]], i32 0
9494
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <vscale x 2 x i32> [[V0]], i32 1
95-
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V0_0]]
9695
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[V0_0]], [[V0_1]]
96+
; CHECK-NEXT: [[TMP0_0:%.*]] = mul i32 [[V0_0]], 2
9797
; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP1_0]]
9898
; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <vscale x 4 x i32> undef, i32 [[TMP2_0]], i32 0
9999
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3_0]]

llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,27 @@ define dso_local void @l(i1 %arg) local_unnamed_addr {
1111
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP9:%.*]], [[BB25:%.*]] ]
1212
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB3:%.*]], label [[BB11:%.*]]
1313
; CHECK: bb3:
14-
; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32
1514
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef
1615
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], splat (i16 8)
16+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <3 x i32> <i32 0, i32 1, i32 poison>
17+
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <3 x i1> <i1 poison, i1 poison, i1 undef>, <3 x i1> [[TMP10]], <3 x i32> <i32 3, i32 4, i32 2>
1718
; CHECK-NEXT: br label [[BB25]]
1819
; CHECK: bb11:
19-
; CHECK-NEXT: [[I12:%.*]] = zext i1 undef to i32
2020
; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP0]], undef
2121
; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[TMP3]] to <2 x i64>
2222
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> undef, [[TMP4]]
23-
; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i32>
23+
; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i8>
24+
; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i8> [[TMP8]] to <2 x i32>
2425
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <2 x i32> undef, [[TMP6]]
26+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i1> [[TMP7]], <2 x i1> poison, <3 x i32> <i32 0, i32 1, i32 poison>
27+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <3 x i1> <i1 poison, i1 poison, i1 undef>, <3 x i1> [[TMP11]], <3 x i32> <i32 3, i32 4, i32 2>
2528
; CHECK-NEXT: br label [[BB25]]
2629
; CHECK: bb25:
27-
; CHECK-NEXT: [[I28:%.*]] = phi i32 [ [[I12]], [[BB11]] ], [ [[I4]], [[BB3]] ]
28-
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i1> [ [[TMP7]], [[BB11]] ], [ [[TMP2]], [[BB3]] ]
2930
; CHECK-NEXT: [[TMP9]] = phi <2 x i16> [ [[TMP3]], [[BB11]] ], [ [[TMP1]], [[BB3]] ]
30-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
31-
; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i32
32-
; CHECK-NEXT: [[I31:%.*]] = and i32 undef, [[TMP11]]
33-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
31+
; CHECK-NEXT: [[TMP14:%.*]] = phi <3 x i1> [ [[TMP16]], [[BB11]] ], [ [[TMP15]], [[BB3]] ]
32+
; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.and.v3i1(<3 x i1> [[TMP14]])
3433
; CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[TMP12]] to i32
35-
; CHECK-NEXT: [[I32:%.*]] = and i32 [[I31]], [[TMP13]]
36-
; CHECK-NEXT: [[I33:%.*]] = and i32 [[I32]], [[I28]]
34+
; CHECK-NEXT: [[I33:%.*]] = and i32 [[TMP13]], undef
3735
; CHECK-NEXT: br i1 [[ARG]], label [[BB34:%.*]], label [[BB1]]
3836
; CHECK: bb34:
3937
; CHECK-NEXT: [[I35:%.*]] = phi i32 [ [[I33]], [[BB25]] ]

0 commit comments

Comments
 (0)