diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 40944e3d43d6b..96fa85179d023 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4532,7 +4532,9 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm) // Rotate by a constant is a funnel shift in IR which is exanded to @@ -4558,10 +4560,18 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) { !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0))) return false; - SDValue XOR = N0.getOperand(1); - if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1)) + if (N0.getOperand(1) != N1.getOperand(1)) return false; + SDValue R1, R2; + bool IsXOROperand = true; + if (N0.getOperand(1).getOpcode() != ISD::XOR) { + IsXOROperand = false; + } else { + R1 = N0.getOperand(1).getOperand(0); + R2 = N1.getOperand(1).getOperand(1); + } + APInt ShlAmt, ShrAmt; if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) || !ISD::isConstantSplatVector(N1.getOperand(2).getNode(), ShrAmt)) @@ -4570,11 +4580,23 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) { if (ShlAmt + ShrAmt != VT.getScalarSizeInBits()) return false; - SDLoc DL(N); + if (!IsXOROperand) { + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64); + SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero); + SDValue MOVIV = SDValue(MOV, 0); + + SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); + SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, + VT, Zero, MOVIV, ZSub); + + R1 = N1->getOperand(1); + R2 = SDValue(SubRegToReg, 0); + } + SDValue Imm = CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32); - SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm}; + SDValue Ops[] = {R1, R2, Imm}; if (auto Opc = SelectOpcodeFromVT( VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S, AArch64::XAR_ZZZI_D})) { @@ -4591,24 +4613,36 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) { N1->getOpcode() != AArch64ISD::VLSHR) return false; - if (N0->getOperand(0) != N1->getOperand(0) || - N1->getOperand(0)->getOpcode() != ISD::XOR) + if (N0->getOperand(0) != N1->getOperand(0)) return false; - SDValue XOR = N0.getOperand(0); - SDValue R1 = XOR.getOperand(0); - SDValue R2 = XOR.getOperand(1); + SDValue R1, R2; + bool IsXOROperand = true; + if (N1->getOperand(0)->getOpcode() != ISD::XOR) { + IsXOROperand = false; + } else { + SDValue XOR = N0.getOperand(0); + R1 = XOR.getOperand(0); + R2 = XOR.getOperand(1); + } unsigned HsAmt = N0.getConstantOperandVal(1); unsigned ShAmt = N1.getConstantOperandVal(1); - SDLoc DL = SDLoc(N0.getOperand(1)); SDValue Imm = CurDAG->getTargetConstant( ShAmt, DL, N0.getOperand(1).getValueType(), false); if (ShAmt + HsAmt != 64) return false; + if (!IsXOROperand) { + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64); + SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero); + SDValue MOVIV = SDValue(MOV, 0); + R1 = N1->getOperand(0); + R2 = MOVIV; + } + SDValue Ops[] = {R1, R2, Imm}; CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops); diff --git a/llvm/test/CodeGen/AArch64/sve2-xar.ll b/llvm/test/CodeGen/AArch64/sve2-xar.ll index e5a240b7a53fd..888e94d42f449 100644 --- a/llvm/test/CodeGen/AArch64/sve2-xar.ll +++ b/llvm/test/CodeGen/AArch64/sve2-xar.ll @@ -169,19 +169,86 @@ define @xar_nxv2i64_l_neg1( %x, @xar_nxv2i64_l_neg2( %x, %y) { -; CHECK-LABEL: xar_nxv2i64_l_neg2: -; CHECK: // %bb.0: -; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: lsr z1.d, z0.d, #4 -; CHECK-NEXT: lsl z0.d, z0.d, #60 -; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: ret +define @xar_nxv2i64_l_neg2_1( %x, %y) { +; SVE-LABEL: xar_nxv2i64_l_neg2_1: +; SVE: // %bb.0: +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: lsr z1.d, z0.d, #4 +; SVE-NEXT: lsl z0.d, z0.d, #60 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: ret +; +; SVE2-LABEL: xar_nxv2i64_l_neg2_1: +; SVE2: // %bb.0: +; SVE2-NEXT: movi v2.2d, #0000000000000000 +; SVE2-NEXT: orr z0.d, z0.d, z1.d +; SVE2-NEXT: xar z0.d, z0.d, z2.d, #4 +; SVE2-NEXT: ret %a = or %x, %y %b = call @llvm.fshl.nxv2i64( %a, %a, splat (i64 60)) ret %b } +define @xar_nxv2i32_l_neg2_2( %x, %y) { +; SVE-LABEL: xar_nxv2i32_l_neg2_2: +; SVE: // %bb.0: +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: lsr z1.s, z0.s, #4 +; SVE-NEXT: lsl z0.s, z0.s, #28 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: ret +; +; SVE2-LABEL: xar_nxv2i32_l_neg2_2: +; SVE2: // %bb.0: +; SVE2-NEXT: movi v2.2d, #0000000000000000 +; SVE2-NEXT: orr z0.d, z0.d, z1.d +; SVE2-NEXT: xar z0.s, z0.s, z2.s, #4 +; SVE2-NEXT: ret + %a = or %x, %y + %b = call @llvm.fshl.nxv4i32( %a, %a, splat (i32 60)) + ret %b +} + +define @xar_nxv2i16_l_neg2_3( %x, %y) { +; SVE-LABEL: xar_nxv2i16_l_neg2_3: +; SVE: // %bb.0: +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: lsr z1.h, z0.h, #4 +; SVE-NEXT: lsl z0.h, z0.h, #12 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: ret +; +; SVE2-LABEL: xar_nxv2i16_l_neg2_3: +; SVE2: // %bb.0: +; SVE2-NEXT: movi v2.2d, #0000000000000000 +; SVE2-NEXT: orr z0.d, z0.d, z1.d +; SVE2-NEXT: xar z0.h, z0.h, z2.h, #4 +; SVE2-NEXT: ret + %a = or %x, %y + %b = call @llvm.fshl.nxv8i16( %a, %a, splat (i16 60)) + ret %b +} + +define @xar_nxv2i8_l_neg2_4( %x, %y) { +; SVE-LABEL: xar_nxv2i8_l_neg2_4: +; SVE: // %bb.0: +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: lsr z1.b, z0.b, #4 +; SVE-NEXT: lsl z0.b, z0.b, #4 +; SVE-NEXT: orr z0.d, z0.d, z1.d +; SVE-NEXT: ret +; +; SVE2-LABEL: xar_nxv2i8_l_neg2_4: +; SVE2: // %bb.0: +; SVE2-NEXT: movi v2.2d, #0000000000000000 +; SVE2-NEXT: orr z0.d, z0.d, z1.d +; SVE2-NEXT: xar z0.b, z0.b, z2.b, #4 +; SVE2-NEXT: ret + %a = or %x, %y + %b = call @llvm.fshl.nxv16i8( %a, %a, splat (i8 60)) + ret %b +} + ; Rotate amount is 0. define @xar_nxv2i64_l_neg3( %x, %y) { ; CHECK-LABEL: xar_nxv2i64_l_neg3: diff --git a/llvm/test/CodeGen/AArch64/xar.ll b/llvm/test/CodeGen/AArch64/xar.ll index d050eaf6646de..e15cb6a696aa5 100644 --- a/llvm/test/CodeGen/AArch64/xar.ll +++ b/llvm/test/CodeGen/AArch64/xar.ll @@ -19,4 +19,82 @@ define <2 x i64> @xar(<2 x i64> %x, <2 x i64> %y) { ret <2 x i64> %b } +define <2 x i64> @xar_instead_of_or1(<2 x i64> %r) { +; SHA3-LABEL: xar_instead_of_or1: +; SHA3: // %bb.0: // %entry +; SHA3-NEXT: movi v1.2d, #0000000000000000 +; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #39 +; SHA3-NEXT: ret +; +; NOSHA3-LABEL: xar_instead_of_or1: +; NOSHA3: // %bb.0: // %entry +; NOSHA3-NEXT: shl v1.2d, v0.2d, #25 +; NOSHA3-NEXT: usra v1.2d, v0.2d, #39 +; NOSHA3-NEXT: mov v0.16b, v1.16b +; NOSHA3-NEXT: ret +entry: + %or = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %r, <2 x i64> %r, <2 x i64> splat (i64 25)) + ret <2 x i64> %or +} + +define <4 x i32> @xar_instead_of_or2(<4 x i32> %r) { +; SHA3-LABEL: xar_instead_of_or2: +; SHA3: // %bb.0: // %entry +; SHA3-NEXT: shl v1.4s, v0.4s, #25 +; SHA3-NEXT: usra v1.4s, v0.4s, #7 +; SHA3-NEXT: mov v0.16b, v1.16b +; SHA3-NEXT: ret +; +; NOSHA3-LABEL: xar_instead_of_or2: +; NOSHA3: // %bb.0: // %entry +; NOSHA3-NEXT: shl v1.4s, v0.4s, #25 +; NOSHA3-NEXT: usra v1.4s, v0.4s, #7 +; NOSHA3-NEXT: mov v0.16b, v1.16b +; NOSHA3-NEXT: ret +entry: + %or = call <4 x i32> @llvm.fshl.v2i32(<4 x i32> %r, <4 x i32> %r, <4 x i32> splat (i32 25)) + ret <4 x i32> %or +} + +define <8 x i16> @xar_instead_of_or3(<8 x i16> %r) { +; SHA3-LABEL: xar_instead_of_or3: +; SHA3: // %bb.0: // %entry +; SHA3-NEXT: shl v1.8h, v0.8h, #9 +; SHA3-NEXT: usra v1.8h, v0.8h, #7 +; SHA3-NEXT: mov v0.16b, v1.16b +; SHA3-NEXT: ret +; +; NOSHA3-LABEL: xar_instead_of_or3: +; NOSHA3: // %bb.0: // %entry +; NOSHA3-NEXT: shl v1.8h, v0.8h, #9 +; NOSHA3-NEXT: usra v1.8h, v0.8h, #7 +; NOSHA3-NEXT: mov v0.16b, v1.16b +; NOSHA3-NEXT: ret +entry: + %or = call <8 x i16> @llvm.fshl.v2i16(<8 x i16> %r, <8 x i16> %r, <8 x i16> splat (i16 25)) + ret <8 x i16> %or +} + +define <16 x i8> @xar_instead_of_or4(<16 x i8> %r) { +; SHA3-LABEL: xar_instead_of_or4: +; SHA3: // %bb.0: // %entry +; SHA3-NEXT: add v1.16b, v0.16b, v0.16b +; SHA3-NEXT: usra v1.16b, v0.16b, #7 +; SHA3-NEXT: mov v0.16b, v1.16b +; SHA3-NEXT: ret +; +; NOSHA3-LABEL: xar_instead_of_or4: +; NOSHA3: // %bb.0: // %entry +; NOSHA3-NEXT: add v1.16b, v0.16b, v0.16b +; NOSHA3-NEXT: usra v1.16b, v0.16b, #7 +; NOSHA3-NEXT: mov v0.16b, v1.16b +; NOSHA3-NEXT: ret +entry: + %or = call <16 x i8> @llvm.fshl.v2i8(<16 x i8> %r, <16 x i8> %r, <16 x i8> splat (i8 25)) + ret <16 x i8> %or +} + declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)