|
18 | 18 | define i32 @reduce_and4(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) { |
19 | 19 | ; SSE2-LABEL: @reduce_and4( |
20 | 20 | ; SSE2-NEXT: entry: |
21 | | -; SSE2-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
22 | | -; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
23 | | -; SSE2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) |
24 | | -; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]]) |
25 | | -; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP3]] |
| 21 | +; SSE2-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
| 22 | +; SSE2-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]]) |
| 23 | +; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
| 24 | +; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]]) |
| 25 | +; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1]], [[TMP3]] |
26 | 26 | ; SSE2-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]] |
27 | 27 | ; SSE2-NEXT: ret i32 [[OP_RDX1]] |
28 | 28 | ; |
29 | 29 | ; SSE42-LABEL: @reduce_and4( |
30 | 30 | ; SSE42-NEXT: entry: |
31 | | -; SSE42-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
32 | | -; SSE42-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
33 | | -; SSE42-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) |
34 | | -; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]]) |
35 | | -; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP3]] |
| 31 | +; SSE42-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
| 32 | +; SSE42-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]]) |
| 33 | +; SSE42-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
| 34 | +; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]]) |
| 35 | +; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1]], [[TMP3]] |
36 | 36 | ; SSE42-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]] |
37 | 37 | ; SSE42-NEXT: ret i32 [[OP_RDX1]] |
38 | 38 | ; |
39 | 39 | ; AVX-LABEL: @reduce_and4( |
40 | 40 | ; AVX-NEXT: entry: |
41 | | -; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
42 | | -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
43 | | -; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) |
44 | | -; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]]) |
45 | | -; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP3]] |
| 41 | +; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
| 42 | +; AVX-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]]) |
| 43 | +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> |
| 44 | +; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]]) |
| 45 | +; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1]], [[TMP3]] |
46 | 46 | ; AVX-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]] |
47 | 47 | ; AVX-NEXT: ret i32 [[OP_RDX1]] |
48 | 48 | ; |
@@ -92,29 +92,29 @@ entry: |
92 | 92 |
|
93 | 93 | define i32 @reduce_and4_transpose(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) { |
94 | 94 | ; SSE2-LABEL: @reduce_and4_transpose( |
95 | | -; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
96 | | -; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
97 | | -; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]]) |
98 | | -; SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) |
99 | | -; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[TMP4]] |
| 95 | +; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
| 96 | +; SSE2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) |
| 97 | +; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
| 98 | +; SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]]) |
| 99 | +; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP4]] |
100 | 100 | ; SSE2-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]] |
101 | 101 | ; SSE2-NEXT: ret i32 [[OP_RDX1]] |
102 | 102 | ; |
103 | 103 | ; SSE42-LABEL: @reduce_and4_transpose( |
104 | | -; SSE42-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
105 | | -; SSE42-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
106 | | -; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]]) |
107 | | -; SSE42-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) |
108 | | -; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[TMP4]] |
| 104 | +; SSE42-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
| 105 | +; SSE42-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) |
| 106 | +; SSE42-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
| 107 | +; SSE42-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]]) |
| 108 | +; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP4]] |
109 | 109 | ; SSE42-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]] |
110 | 110 | ; SSE42-NEXT: ret i32 [[OP_RDX1]] |
111 | 111 | ; |
112 | 112 | ; AVX-LABEL: @reduce_and4_transpose( |
113 | | -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
114 | | -; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
115 | | -; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]]) |
116 | | -; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) |
117 | | -; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[TMP4]] |
| 113 | +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
| 114 | +; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) |
| 115 | +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> |
| 116 | +; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]]) |
| 117 | +; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP4]] |
118 | 118 | ; AVX-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]] |
119 | 119 | ; AVX-NEXT: ret i32 [[OP_RDX1]] |
120 | 120 | ; |
|
0 commit comments