-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[GISel][AArch64] Added more efficient lowering of Bitreverse #139233
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-globalisel Author: None (jyli0116) ChangesGlobalISel was previously inefficient in handling bitreverses of vector types. This deals with i16, i32, i64 vector types and converts them into i8 bitreverses and rev instructions. Patch is 29.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139233.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index da0bcdc123e2a..a0ec0068c6275 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8971,67 +8971,25 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
return Legalized;
}
-//{ (Src & Mask) >> N } | { (Src << N) & Mask }
-static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
- MachineInstrBuilder Src, const APInt &Mask) {
- const LLT Ty = Dst.getLLTTy(*B.getMRI());
- MachineInstrBuilder C_N = B.buildConstant(Ty, N);
- MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
- auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
- auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
- return B.buildOr(Dst, LHS, RHS);
-}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
auto [Dst, Src] = MI.getFirst2Regs();
- const LLT Ty = MRI.getType(Src);
- unsigned Size = Ty.getScalarSizeInBits();
-
- if (Size >= 8) {
- MachineInstrBuilder BSWAP =
- MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
-
- // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
- // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
- // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
- MachineInstrBuilder Swap4 =
- SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
-
- // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
- // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
- // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
- MachineInstrBuilder Swap2 =
- SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
-
- // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
- // 6|7
- // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
- // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
- SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
- } else {
- // Expand bitreverse for types smaller than 8 bits.
- MachineInstrBuilder Tmp;
- for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
- MachineInstrBuilder Tmp2;
- if (I < J) {
- auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
- Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
- } else {
- auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
- Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
- }
+ const LLT SrcTy = MRI.getType(Src);
+ unsigned Size = SrcTy.getScalarSizeInBits();
+ unsigned VSize = SrcTy.getSizeInBits();
- auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
- Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
- if (I == 0)
- Tmp = Tmp2;
- else
- Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
- }
- MIRBuilder.buildCopy(Dst, Tmp);
+ LLT VTy = VSize == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
+
+ if (!LI.isLegal({TargetOpcode::G_BSWAP, {SrcTy, SrcTy}})) {
+ return UnableToLegalize;
}
+ auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
+ auto Cast = MIRBuilder.buildBitcast(VTy , BSWAP);
+ auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
+ MIRBuilder.buildBitcast(Dst, RBIT);
+
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 28957f2664282..7ff2e55e802c5 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -361,12 +361,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
- // TODO: Custom lowering for v2s32, v4s32, v2s64.
getActionDefinitionsBuilder(G_BITREVERSE)
.legalFor({s32, s64, v8s8, v16s8})
.widenScalarToNextPow2(0, /*Min = */ 32)
+ .widenScalarOrEltToNextPow2OrMinSize(0, 8)
.clampScalar(0, s32, s64)
.clampNumElements(0, v8s8, v16s8)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0)
.lower();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
index 607f213f67145..dfd4dd8a43544 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
@@ -152,33 +152,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<4 x s16>) = COPY $d0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<4 x s16>) = G_BSWAP %vec
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3856
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<4 x s16>)
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s16>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<4 x s16>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[LSHR]], [[AND1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13108
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<4 x s16>)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<4 x s16>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<4 x s16>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[LSHR1]], [[AND3]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -21846
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<4 x s16>)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<4 x s16>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<4 x s16>)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: %bitreverse:_(<4 x s16>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BSWAP]](<4 x s16>)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<8 x s8>) = G_BITREVERSE [[BITCAST]]
+ ; CHECK-NEXT: %bitreverse:_(<4 x s16>) = G_BITCAST [[BITREVERSE]](<8 x s8>)
; CHECK-NEXT: $d0 = COPY %bitreverse(<4 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<4 x s16>) = COPY $d0
@@ -197,33 +173,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s32>) = G_BSWAP %vec
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -252645136
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[LSHR]], [[AND1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -858993460
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[OR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<2 x s32>)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s32>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<2 x s32>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[LSHR1]], [[AND3]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<2 x s32>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<2 x s32>)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s32>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<2 x s32>)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: %bitreverse:_(<2 x s32>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BSWAP]](<2 x s32>)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<8 x s8>) = G_BITREVERSE [[BITCAST]]
+ ; CHECK-NEXT: %bitreverse:_(<2 x s32>) = G_BITCAST [[BITREVERSE]](<8 x s8>)
; CHECK-NEXT: $d0 = COPY %bitreverse(<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%vec:_(<2 x s32>) = COPY $d0
@@ -242,33 +194,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s64>) = G_BSWAP %vec
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1085102592571150096
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<2 x s64>)
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s64>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<2 x s64>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[LSHR]], [[AND1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -3689348814741910324
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s64>) = G_AND [[OR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<2 x s64>)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s64>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<2 x s64>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s64>) = G_OR [[LSHR1]], [[AND3]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C4]](s64), [[C4]](s64)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 -6148914691236517206
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C5]](s64), [[C5]](s64)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<2 x s64>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<2 x s64>)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s64>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<2 x s64>)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: %bitreverse:_(<2 x s64>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<2 x s64>)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
+ ; CHECK-NEXT: %bitreverse:_(<2 x s64>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
; CHECK-NEXT: $q0 = COPY %bitreverse(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<2 x s64>) = COPY $q0
@@ -287,33 +215,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<4 x s32>) = G_BSWAP %vec
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -252645136
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<4 x s32>)
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<4 x s32>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[LSHR]], [[AND1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -858993460
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s32>) = G_AND [[OR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<4 x s32>)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<4 x s32>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<4 x s32>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s32>) = G_OR [[LSHR1]], [[AND3]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<4 x s32>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<4 x s32>)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<4 x s32>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<4 x s32>)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: %bitreverse:_(<4 x s32>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<4 x s32>)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
+ ; CHECK-NEXT: %bitreverse:_(<4 x s32>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
; CHECK-NEXT: $q0 = COPY %bitreverse(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<4 x s32>) = COPY $q0
@@ -332,33 +236,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<8 x s16>) = G_BSWAP %vec
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3856
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s16>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<8 x s16>)
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<8 x s16>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<8 x s16>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s16>) = G_OR [[LSHR]], [[AND1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13108
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<8 x s16>) = G_AND [[OR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<8 x s16>)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<8 x s16>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<8 x s16>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<8 x s16>) = G_OR [[LSHR1]], [[AND3]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -21846
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<8 x s16>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<8 x s16>)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<8 x s16>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<8 x s16>)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: %bitreverse:_(<8 x s16>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<8 x s16>)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
+ ; CHECK-NEXT: %bitreverse:_(<8 x s16>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
; CHECK-NEXT: $q0 = COPY %bitreverse(<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<8 x s16>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/bitreverse.ll b/llvm/test/CodeGen/AArch64/bitreverse.ll
index 25fa3c6248ee3..8d9f6996d0b70 100644
--- a/llvm/test/CodeGen/AArch64/bitreverse.ll
+++ b/llvm/test/CodeGen/AArch64/bitreverse.ll
@@ -17,52 +17,10 @@ define <2 x i16> @f(<2 x i16> %a) {
; GISEL-LABEL: f:
; GISEL: // %bb.0:
; GISEL-NEXT: uzp1 v0.4h, v0.4h, v0.4h
-; GISEL-NEXT: mov w8, #61680 // =0xf0f0
-; GISEL-NEXT: dup v1.2s, w8
-; GISEL-NEXT: mov w8, #4 // =0x4
-; GISEL-NEXT: fmov s3, w8
; GISEL-NEXT: rev16 v0.8b, v0.8b
-; GISEL-NEXT: mov v3.h[1], w8
-; GISEL-NEXT: mov w8, #52428 // =0xcccc
-; GISEL-NEXT: ushll v2.4s, v0.4h, #0
-; GISEL-NEXT: neg v4.4h, v3.4h
-; GISEL-NEXT: and v2.8b, v2.8b, v1.8b
-; GISEL-NEXT: uzp1 v2.4h, v2.4h, v0.4h
-; GISEL-NE...
[truncated]
|
@llvm/pr-subscribers-backend-aarch64 Author: None (jyli0116) ChangesGlobalISel was previously inefficient in handling bitreverses of vector types. This deals with i16, i32, i64 vector types and converts them into i8 bitreverses and rev instructions. Patch is 29.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139233.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index da0bcdc123e2a..a0ec0068c6275 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8971,67 +8971,25 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
return Legalized;
}
-//{ (Src & Mask) >> N } | { (Src << N) & Mask }
-static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
- MachineInstrBuilder Src, const APInt &Mask) {
- const LLT Ty = Dst.getLLTTy(*B.getMRI());
- MachineInstrBuilder C_N = B.buildConstant(Ty, N);
- MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
- auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
- auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
- return B.buildOr(Dst, LHS, RHS);
-}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
auto [Dst, Src] = MI.getFirst2Regs();
- const LLT Ty = MRI.getType(Src);
- unsigned Size = Ty.getScalarSizeInBits();
-
- if (Size >= 8) {
- MachineInstrBuilder BSWAP =
- MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
-
- // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
- // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
- // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
- MachineInstrBuilder Swap4 =
- SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
-
- // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
- // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
- // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
- MachineInstrBuilder Swap2 =
- SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
-
- // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
- // 6|7
- // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
- // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
- SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
- } else {
- // Expand bitreverse for types smaller than 8 bits.
- MachineInstrBuilder Tmp;
- for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
- MachineInstrBuilder Tmp2;
- if (I < J) {
- auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
- Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
- } else {
- auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
- Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
- }
+ const LLT SrcTy = MRI.getType(Src);
+ unsigned Size = SrcTy.getScalarSizeInBits();
+ unsigned VSize = SrcTy.getSizeInBits();
- auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
- Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
- if (I == 0)
- Tmp = Tmp2;
- else
- Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
- }
- MIRBuilder.buildCopy(Dst, Tmp);
+ LLT VTy = VSize == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
+
+ if (!LI.isLegal({TargetOpcode::G_BSWAP, {SrcTy, SrcTy}})) {
+ return UnableToLegalize;
}
+ auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
+ auto Cast = MIRBuilder.buildBitcast(VTy , BSWAP);
+ auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
+ MIRBuilder.buildBitcast(Dst, RBIT);
+
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 28957f2664282..7ff2e55e802c5 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -361,12 +361,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
- // TODO: Custom lowering for v2s32, v4s32, v2s64.
getActionDefinitionsBuilder(G_BITREVERSE)
.legalFor({s32, s64, v8s8, v16s8})
.widenScalarToNextPow2(0, /*Min = */ 32)
+ .widenScalarOrEltToNextPow2OrMinSize(0, 8)
.clampScalar(0, s32, s64)
.clampNumElements(0, v8s8, v16s8)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0)
.lower();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
index 607f213f67145..dfd4dd8a43544 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir
@@ -152,33 +152,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<4 x s16>) = COPY $d0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<4 x s16>) = G_BSWAP %vec
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3856
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<4 x s16>)
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s16>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<4 x s16>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[LSHR]], [[AND1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13108
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<4 x s16>)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<4 x s16>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<4 x s16>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[LSHR1]], [[AND3]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -21846
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<4 x s16>)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<4 x s16>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<4 x s16>)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: %bitreverse:_(<4 x s16>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BSWAP]](<4 x s16>)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<8 x s8>) = G_BITREVERSE [[BITCAST]]
+ ; CHECK-NEXT: %bitreverse:_(<4 x s16>) = G_BITCAST [[BITREVERSE]](<8 x s8>)
; CHECK-NEXT: $d0 = COPY %bitreverse(<4 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<4 x s16>) = COPY $d0
@@ -197,33 +173,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s32>) = G_BSWAP %vec
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -252645136
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[LSHR]], [[AND1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -858993460
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[OR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<2 x s32>)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s32>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<2 x s32>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[LSHR1]], [[AND3]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<2 x s32>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<2 x s32>)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s32>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<2 x s32>)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: %bitreverse:_(<2 x s32>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BSWAP]](<2 x s32>)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<8 x s8>) = G_BITREVERSE [[BITCAST]]
+ ; CHECK-NEXT: %bitreverse:_(<2 x s32>) = G_BITCAST [[BITREVERSE]](<8 x s8>)
; CHECK-NEXT: $d0 = COPY %bitreverse(<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%vec:_(<2 x s32>) = COPY $d0
@@ -242,33 +194,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s64>) = G_BSWAP %vec
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1085102592571150096
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<2 x s64>)
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s64>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<2 x s64>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[LSHR]], [[AND1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -3689348814741910324
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s64>) = G_AND [[OR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<2 x s64>)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s64>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<2 x s64>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s64>) = G_OR [[LSHR1]], [[AND3]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C4]](s64), [[C4]](s64)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 -6148914691236517206
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C5]](s64), [[C5]](s64)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<2 x s64>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<2 x s64>)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s64>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<2 x s64>)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: %bitreverse:_(<2 x s64>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<2 x s64>)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
+ ; CHECK-NEXT: %bitreverse:_(<2 x s64>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
; CHECK-NEXT: $q0 = COPY %bitreverse(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<2 x s64>) = COPY $q0
@@ -287,33 +215,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<4 x s32>) = G_BSWAP %vec
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -252645136
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<4 x s32>)
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<4 x s32>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[LSHR]], [[AND1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -858993460
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s32>) = G_AND [[OR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<4 x s32>)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<4 x s32>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<4 x s32>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s32>) = G_OR [[LSHR1]], [[AND3]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<4 x s32>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<4 x s32>)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<4 x s32>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<4 x s32>)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: %bitreverse:_(<4 x s32>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<4 x s32>)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
+ ; CHECK-NEXT: %bitreverse:_(<4 x s32>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
; CHECK-NEXT: $q0 = COPY %bitreverse(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<4 x s32>) = COPY $q0
@@ -332,33 +236,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %vec:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<8 x s16>) = G_BSWAP %vec
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3856
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s16>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<8 x s16>)
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<8 x s16>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<8 x s16>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s16>) = G_OR [[LSHR]], [[AND1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13108
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<8 x s16>) = G_AND [[OR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<8 x s16>)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<8 x s16>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<8 x s16>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<8 x s16>) = G_OR [[LSHR1]], [[AND3]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -21846
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<8 x s16>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<8 x s16>)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<8 x s16>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<8 x s16>)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: %bitreverse:_(<8 x s16>) = G_OR [[LSHR2]], [[AND5]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<8 x s16>)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
+ ; CHECK-NEXT: %bitreverse:_(<8 x s16>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
; CHECK-NEXT: $q0 = COPY %bitreverse(<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%vec:_(<8 x s16>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/bitreverse.ll b/llvm/test/CodeGen/AArch64/bitreverse.ll
index 25fa3c6248ee3..8d9f6996d0b70 100644
--- a/llvm/test/CodeGen/AArch64/bitreverse.ll
+++ b/llvm/test/CodeGen/AArch64/bitreverse.ll
@@ -17,52 +17,10 @@ define <2 x i16> @f(<2 x i16> %a) {
; GISEL-LABEL: f:
; GISEL: // %bb.0:
; GISEL-NEXT: uzp1 v0.4h, v0.4h, v0.4h
-; GISEL-NEXT: mov w8, #61680 // =0xf0f0
-; GISEL-NEXT: dup v1.2s, w8
-; GISEL-NEXT: mov w8, #4 // =0x4
-; GISEL-NEXT: fmov s3, w8
; GISEL-NEXT: rev16 v0.8b, v0.8b
-; GISEL-NEXT: mov v3.h[1], w8
-; GISEL-NEXT: mov w8, #52428 // =0xcccc
-; GISEL-NEXT: ushll v2.4s, v0.4h, #0
-; GISEL-NEXT: neg v4.4h, v3.4h
-; GISEL-NEXT: and v2.8b, v2.8b, v1.8b
-; GISEL-NEXT: uzp1 v2.4h, v2.4h, v0.4h
-; GISEL-NE...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
LLT VTy = VSize == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8); | ||
|
||
if (!LI.isLegal({TargetOpcode::G_BSWAP, {SrcTy, SrcTy}})) { | ||
return UnableToLegalize; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could it just use the new lowering when bswap is legal? The smaller-than-i8 looks especially useful.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Different implementations should be available as standalone helper functions in LegalizerHelper. The default implementation can use heuristics like this to select a reasonable default
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The previous smaller-than-i8 section is currently falling back onto SDAG, so instead I used widenScalarorEltToNextPow2OrMinSize - Do you think it would be better to get the previous smaller than i8 section working?
Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2); | ||
} | ||
MIRBuilder.buildCopy(Dst, Tmp); | ||
LLT VTy = VSize == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this try to generate a vector of the same size as the input, to keep it generic.
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA))); | ||
LLT VTy = LLT::fixed_vector(VSize / 8, 8); | ||
|
||
if (LI.isLegal({TargetOpcode::G_BITREVERSE, {VTy, VTy}})) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it worth checking that [Elt]Size % 8 == 0
too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
this fixes #120878? |
GlobalISel was previously inefficient in handling bitreverses of vector types. This deals with i16, i32, i64 vector types and converts them into i8 bitreverses and rev instructions.