diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 771eee1b3fecf..dba0562dfee4f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22042,6 +22042,30 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N, return DAG.getNode(TopOpcode, DL, AccVT, BottomNode, ExtOp); } +static SDValue combineSVEBitSel(unsigned IID, SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue Op1 = N->getOperand(1); + SDValue Op2 = N->getOperand(2); + SDValue Op3 = N->getOperand(3); + + switch (IID) { + default: + llvm_unreachable("Called with wrong intrinsic!"); + case Intrinsic::aarch64_sve_bsl: + return DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, Op1, Op2); + case Intrinsic::aarch64_sve_bsl1n: + return DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, DAG.getNOT(DL, Op1, VT), + Op2); + case Intrinsic::aarch64_sve_bsl2n: + return DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, Op1, + DAG.getNOT(DL, Op2, VT)); + case Intrinsic::aarch64_sve_nbsl: + return DAG.getNOT(DL, DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, Op1, Op2), + VT); + } +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -22364,6 +22388,11 @@ static SDValue performIntrinsicCombine(SDNode *N, AArch64CC::LAST_ACTIVE); case Intrinsic::aarch64_sve_whilelo: return tryCombineWhileLo(N, DCI, Subtarget); + case Intrinsic::aarch64_sve_bsl: + case Intrinsic::aarch64_sve_bsl1n: + case Intrinsic::aarch64_sve_bsl2n: + case Intrinsic::aarch64_sve_nbsl: + return combineSVEBitSel(IID, N, DAG); } return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index d13728ec930c8..f1342b5ca6ea1 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -545,12 +545,18 @@ def AArch64umulh : PatFrag<(ops node:$op1, node:$op2), def AArch64bsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3), - [(int_aarch64_sve_bsl node:$Op1, node:$Op2, node:$Op3), - (AArch64bsp node:$Op3, node:$Op1, node:$Op2)]>; + [(AArch64bsp node:$Op3, node:$Op1, node:$Op2), + (or (and node:$Op1, node:$Op3), (and node:$Op2, (vnot node:$Op3)))]>; -def AArch64nbsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3), - [(int_aarch64_sve_nbsl node:$Op1, node:$Op2, node:$Op3), - (vnot (AArch64bsp node:$Op3, node:$Op1, node:$Op2))]>; +def AArch64bsl1n : PatFrag<(ops node:$Op1, node:$Op2, node:$Op3), + (AArch64bsl (vnot node:$Op1), node:$Op2, node:$Op3)>; + +def AArch64bsl2n : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3), + [(AArch64bsl node:$Op1, (vnot node:$Op2), node:$Op3), + (or (and node:$Op1, node:$Op3), (vnot (or node:$Op2, node:$Op3)))]>; + +def AArch64nbsl : PatFrag<(ops node:$Op1, node:$Op2, node:$Op3), + (vnot (AArch64bsl node:$Op1, node:$Op2, node:$Op3))>; let Predicates = [HasSVE] in { @@ -3923,8 +3929,8 @@ let Predicates = [HasSVE2_or_SME] in { defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", AArch64eor3>; defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", AArch64bcax>; defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", AArch64bsl>; - defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>; - defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>; + defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", AArch64bsl1n>; + defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", AArch64bsl2n>; defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", AArch64nbsl>; // SVE2 bitwise xor and rotate right by immediate diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll index ef7d4abe5c5f4..e524c5d6b453e 100644 --- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll +++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll @@ -93,3 +93,209 @@ define @nbsl_i64( %a, %b %4 = xor %3, splat(i64 -1) ret %4 } + +; Test BSL/NBSL/BSL1N/BSL2N code generation for: +; #define BSL(x,y,z) ( ((x) & (z)) | ( (y) & ~(z))) +; #define NBSL(x,y,z) (~(((x) & (z)) | ( (y) & ~(z)))) +; #define BSL1N(x,y,z) ( (~(x) & (z)) | ( (y) & ~(z))) +; #define BSL2N(x,y,z) ( ((x) & (z)) | (~(y) & ~(z))) + +define @codegen_bsl_i8( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = xor %2, splat (i8 -1) + %6 = and %1, %5 + %7 = or %4, %6 + ret %7 +} + +define @codegen_nbsl_i8( %0, %1, %2) { +; CHECK-LABEL: codegen_nbsl_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = xor %2, splat (i8 -1) + %6 = and %1, %5 + %7 = or %4, %6 + %8 = xor %7, splat (i8 -1) + ret %8 +} + +define @codegen_bsl1n_i8( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl1n_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = xor %0, splat (i8 -1) + %5 = and %2, %4 + %6 = xor %2, splat (i8 -1) + %7 = and %1, %6 + %8 = or %5, %7 + ret %8 +} + +define @codegen_bsl2n_i8( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl2n_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = or %2, %1 + %6 = xor %5, splat (i8 -1) + %7 = or %4, %6 + ret %7 +} + +define @codegen_bsl_i16( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = xor %2, splat (i16 -1) + %6 = and %1, %5 + %7 = or %4, %6 + ret %7 +} + +define @codegen_nbsl_i16( %0, %1, %2) { +; CHECK-LABEL: codegen_nbsl_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = xor %2, splat (i16 -1) + %6 = and %1, %5 + %7 = or %4, %6 + %8 = xor %7, splat (i16 -1) + ret %8 +} + +define @codegen_bsl1n_i16( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl1n_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = xor %0, splat (i16 -1) + %5 = and %2, %4 + %6 = xor %2, splat (i16 -1) + %7 = and %1, %6 + %8 = or %5, %7 + ret %8 +} + +define @codegen_bsl2n_i16( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl2n_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = or %2, %1 + %6 = xor %5, splat (i16 -1) + %7 = or %4, %6 + ret %7 +} + +define @codegen_bsl_i32( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = xor %2, splat (i32 -1) + %6 = and %1, %5 + %7 = or %4, %6 + ret %7 +} + +define @codegen_nbsl_i32( %0, %1, %2) { +; CHECK-LABEL: codegen_nbsl_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = xor %2, splat (i32 -1) + %6 = and %1, %5 + %7 = or %4, %6 + %8 = xor %7, splat (i32 -1) + ret %8 +} + +define @codegen_bsl1n_i32( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl1n_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = xor %0, splat (i32 -1) + %5 = and %2, %4 + %6 = xor %2, splat (i32 -1) + %7 = and %1, %6 + %8 = or %5, %7 + ret %8 +} + +define @codegen_bsl2n_i32( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl2n_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = or %2, %1 + %6 = xor %5, splat (i32 -1) + %7 = or %4, %6 + ret %7 +} + +define @codegen_bsl_i64( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = xor %2, splat (i64 -1) + %6 = and %1, %5 + %7 = or %4, %6 + ret %7 +} + +define @codegen_nbsl_i64( %0, %1, %2) { +; CHECK-LABEL: codegen_nbsl_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = xor %2, splat (i64 -1) + %6 = and %1, %5 + %7 = or %4, %6 + %8 = xor %7, splat (i64 -1) + ret %8 +} + +define @codegen_bsl1n_i64( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl1n_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = xor %0, splat (i64 -1) + %5 = and %2, %4 + %6 = xor %2, splat (i64 -1) + %7 = and %1, %6 + %8 = or %5, %7 + ret %8 +} + +define @codegen_bsl2n_i64( %0, %1, %2) { +; CHECK-LABEL: codegen_bsl2n_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %4 = and %2, %0 + %5 = or %2, %1 + %6 = xor %5, splat (i64 -1) + %7 = or %4, %6 + ret %7 +}