Closed
Description
Bugzilla Link | 28530 |
Version | trunk |
OS | All |
CC | @aemerson |
Extended Description
$ cat shufsel.ll
define <4 x i32> @foo(<4 x i32> %a, <4 x i32> %b) {
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %sel
}
define <4 x i32> @goo(<4 x i32> %a, <4 x i32> %b) {
%shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
ret <4 x i32> %shuf
}
I'm guessing that one of these is generally better than the other (and there may be a better way than either of these?):
$ ./llc shufsel.ll -o - -mtriple=aarch64
.LCPI0_0:
.word 4294967295 // 0xffffffff
.word 0 // 0x0
.word 0 // 0x0
.word 4294967295 // 0xffffffff
foo:
adrp x8, .LCPI0_0
ldr q2, [x8, :lo12:.LCPI0_0]
bsl v2.16b, v0.16b, v1.16b
mov v0.16b, v2.16b
ret
goo: // @goo
ext v1.16b, v0.16b, v1.16b, #12
ext v0.16b, v1.16b, v0.16b, #4
ext v1.16b, v1.16b, v1.16b, #8
ext v0.16b, v0.16b, v1.16b, #12
ret
Note that in http://reviews.llvm.org/D22114 , there's a proposal to canonicalize to the shufflevector form of the IR.