Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[GISel][AArch64] Added more efficient lowering of Bitreverse #139233

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 44 additions & 29 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8985,49 +8985,64 @@ static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
auto [Dst, Src] = MI.getFirst2Regs();
const LLT Ty = MRI.getType(Src);
unsigned Size = Ty.getScalarSizeInBits();
const LLT SrcTy = MRI.getType(Src);
unsigned Size = SrcTy.getScalarSizeInBits();
unsigned VSize = SrcTy.getSizeInBits();

if (Size >= 8) {
MachineInstrBuilder BSWAP =
MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});

// swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
// [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
// -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
MachineInstrBuilder Swap4 =
SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));

// swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
// [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
// -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
MachineInstrBuilder Swap2 =
SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));

// swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
// 6|7
// [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
// -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
if (SrcTy.isVector() && (VSize % 8 == 0) &&
(LI.isLegal({TargetOpcode::G_BITREVERSE,
{LLT::fixed_vector(VSize / 8, 8),
LLT::fixed_vector(VSize / 8, 8)}}))) {
// If bitreverse is legal for i8 vector of the same size, then cast
// to i8 vector type.
// e.g. v4s32 -> v16s8
LLT VTy = LLT::fixed_vector(VSize / 8, 8);
auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
MIRBuilder.buildBitcast(Dst, RBIT);
} else {
MachineInstrBuilder BSWAP =
MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});

// swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
// [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
// -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
APInt::getSplat(Size, APInt(8, 0xF0)));

// swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
// [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
// -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
APInt::getSplat(Size, APInt(8, 0xCC)));

// swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
// 6|7
// [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
// -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
}
} else {
// Expand bitreverse for types smaller than 8 bits.
MachineInstrBuilder Tmp;
for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
MachineInstrBuilder Tmp2;
if (I < J) {
auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
} else {
auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
}

auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
if (I == 0)
Tmp = Tmp2;
else
Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
}
MIRBuilder.buildCopy(Dst, Tmp);
}
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,12 +361,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)

getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();

// TODO: Custom lowering for v2s32, v4s32, v2s64.
getActionDefinitionsBuilder(G_BITREVERSE)
.legalFor({s32, s64, v8s8, v16s8})
.widenScalarToNextPow2(0, /*Min = */ 32)
.widenScalarOrEltToNextPow2OrMinSize(0, 8)
.clampScalar(0, s32, s64)
.clampNumElements(0, v8s8, v16s8)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0)
.lower();

Expand Down
150 changes: 15 additions & 135 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
Original file line number Diff line number Diff line change
Expand Up @@ -152,33 +152,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<4 x s16>) = COPY $d0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<4 x s16>) = G_BSWAP %vec
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3856
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<4 x s16>)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s16>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<4 x s16>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[LSHR]], [[AND1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13108
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<4 x s16>)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<4 x s16>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<4 x s16>)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[LSHR1]], [[AND3]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -21846
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<4 x s16>)
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<4 x s16>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<4 x s16>)
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
; CHECK-NEXT: %bitreverse:_(<4 x s16>) = G_OR [[LSHR2]], [[AND5]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BSWAP]](<4 x s16>)
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<8 x s8>) = G_BITREVERSE [[BITCAST]]
; CHECK-NEXT: %bitreverse:_(<4 x s16>) = G_BITCAST [[BITREVERSE]](<8 x s8>)
; CHECK-NEXT: $d0 = COPY %bitreverse(<4 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<4 x s16>) = COPY $d0
Expand All @@ -197,33 +173,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s32>) = G_BSWAP %vec
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -252645136
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[LSHR]], [[AND1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -858993460
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[OR]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<2 x s32>)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s32>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<2 x s32>)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[LSHR1]], [[AND3]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<2 x s32>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<2 x s32>)
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s32>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<2 x s32>)
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
; CHECK-NEXT: %bitreverse:_(<2 x s32>) = G_OR [[LSHR2]], [[AND5]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BSWAP]](<2 x s32>)
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<8 x s8>) = G_BITREVERSE [[BITCAST]]
; CHECK-NEXT: %bitreverse:_(<2 x s32>) = G_BITCAST [[BITREVERSE]](<8 x s8>)
; CHECK-NEXT: $d0 = COPY %bitreverse(<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%vec:_(<2 x s32>) = COPY $d0
Expand All @@ -242,33 +194,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s64>) = G_BSWAP %vec
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1085102592571150096
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<2 x s64>)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s64>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<2 x s64>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[LSHR]], [[AND1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -3689348814741910324
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s64>) = G_AND [[OR]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<2 x s64>)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s64>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<2 x s64>)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s64>) = G_OR [[LSHR1]], [[AND3]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C4]](s64), [[C4]](s64)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 -6148914691236517206
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C5]](s64), [[C5]](s64)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<2 x s64>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<2 x s64>)
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s64>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<2 x s64>)
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
; CHECK-NEXT: %bitreverse:_(<2 x s64>) = G_OR [[LSHR2]], [[AND5]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<2 x s64>)
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
; CHECK-NEXT: %bitreverse:_(<2 x s64>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
; CHECK-NEXT: $q0 = COPY %bitreverse(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<2 x s64>) = COPY $q0
Expand All @@ -287,33 +215,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<4 x s32>) = G_BSWAP %vec
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -252645136
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<4 x s32>)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<4 x s32>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[LSHR]], [[AND1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -858993460
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s32>) = G_AND [[OR]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<4 x s32>)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<4 x s32>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<4 x s32>)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s32>) = G_OR [[LSHR1]], [[AND3]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<4 x s32>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<4 x s32>)
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<4 x s32>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<4 x s32>)
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
; CHECK-NEXT: %bitreverse:_(<4 x s32>) = G_OR [[LSHR2]], [[AND5]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<4 x s32>)
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
; CHECK-NEXT: %bitreverse:_(<4 x s32>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
; CHECK-NEXT: $q0 = COPY %bitreverse(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<4 x s32>) = COPY $q0
Expand All @@ -332,33 +236,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<8 x s16>) = G_BSWAP %vec
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3856
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s16>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<8 x s16>)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<8 x s16>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<8 x s16>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s16>) = G_OR [[LSHR]], [[AND1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13108
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<8 x s16>) = G_AND [[OR]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<8 x s16>)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<8 x s16>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<8 x s16>)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<8 x s16>) = G_OR [[LSHR1]], [[AND3]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -21846
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<8 x s16>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<8 x s16>)
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<8 x s16>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<8 x s16>)
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
; CHECK-NEXT: %bitreverse:_(<8 x s16>) = G_OR [[LSHR2]], [[AND5]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<8 x s16>)
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
; CHECK-NEXT: %bitreverse:_(<8 x s16>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
; CHECK-NEXT: $q0 = COPY %bitreverse(<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<8 x s16>) = COPY $q0
Expand Down
Loading