diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4dacd2273306e..d224caa33896c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2710,330 +2710,6 @@ AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, return AArch64::createFastISel(funcInfo, libInfo); } -const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { -#define MAKE_CASE(V) \ - case V: \ - return #V; - switch ((AArch64ISD::NodeType)Opcode) { - case AArch64ISD::FIRST_NUMBER: - break; - MAKE_CASE(AArch64ISD::ALLOCATE_ZA_BUFFER) - MAKE_CASE(AArch64ISD::INIT_TPIDR2OBJ) - MAKE_CASE(AArch64ISD::GET_SME_SAVE_SIZE) - MAKE_CASE(AArch64ISD::ALLOC_SME_SAVE_BUFFER) - MAKE_CASE(AArch64ISD::COALESCER_BARRIER) - MAKE_CASE(AArch64ISD::VG_SAVE) - MAKE_CASE(AArch64ISD::VG_RESTORE) - MAKE_CASE(AArch64ISD::SMSTART) - MAKE_CASE(AArch64ISD::SMSTOP) - MAKE_CASE(AArch64ISD::COND_SMSTART) - MAKE_CASE(AArch64ISD::COND_SMSTOP) - MAKE_CASE(AArch64ISD::RESTORE_ZA) - MAKE_CASE(AArch64ISD::RESTORE_ZT) - MAKE_CASE(AArch64ISD::SAVE_ZT) - MAKE_CASE(AArch64ISD::CALL) - MAKE_CASE(AArch64ISD::ADRP) - MAKE_CASE(AArch64ISD::ADR) - MAKE_CASE(AArch64ISD::ADDlow) - MAKE_CASE(AArch64ISD::AUTH_CALL) - MAKE_CASE(AArch64ISD::AUTH_TC_RETURN) - MAKE_CASE(AArch64ISD::AUTH_CALL_RVMARKER) - MAKE_CASE(AArch64ISD::LOADgot) - MAKE_CASE(AArch64ISD::RET_GLUE) - MAKE_CASE(AArch64ISD::BRCOND) - MAKE_CASE(AArch64ISD::CSEL) - MAKE_CASE(AArch64ISD::CSINV) - MAKE_CASE(AArch64ISD::CSNEG) - MAKE_CASE(AArch64ISD::CSINC) - MAKE_CASE(AArch64ISD::THREAD_POINTER) - MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) - MAKE_CASE(AArch64ISD::TLSDESC_AUTH_CALLSEQ) - MAKE_CASE(AArch64ISD::PROBED_ALLOCA) - MAKE_CASE(AArch64ISD::ABDS_PRED) - MAKE_CASE(AArch64ISD::ABDU_PRED) - MAKE_CASE(AArch64ISD::HADDS_PRED) - MAKE_CASE(AArch64ISD::HADDU_PRED) - MAKE_CASE(AArch64ISD::MUL_PRED) - MAKE_CASE(AArch64ISD::MULHS_PRED) - MAKE_CASE(AArch64ISD::MULHU_PRED) - MAKE_CASE(AArch64ISD::RHADDS_PRED) - MAKE_CASE(AArch64ISD::RHADDU_PRED) - MAKE_CASE(AArch64ISD::SDIV_PRED) - MAKE_CASE(AArch64ISD::SHL_PRED) - MAKE_CASE(AArch64ISD::SMAX_PRED) - MAKE_CASE(AArch64ISD::SMIN_PRED) - MAKE_CASE(AArch64ISD::SRA_PRED) - MAKE_CASE(AArch64ISD::SRL_PRED) - MAKE_CASE(AArch64ISD::UDIV_PRED) - MAKE_CASE(AArch64ISD::UMAX_PRED) - MAKE_CASE(AArch64ISD::UMIN_PRED) - MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1) - MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FCVTX_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) - MAKE_CASE(AArch64ISD::ADC) - MAKE_CASE(AArch64ISD::SBC) - MAKE_CASE(AArch64ISD::ADDS) - MAKE_CASE(AArch64ISD::SUBS) - MAKE_CASE(AArch64ISD::ADCS) - MAKE_CASE(AArch64ISD::SBCS) - MAKE_CASE(AArch64ISD::ANDS) - MAKE_CASE(AArch64ISD::CCMP) - MAKE_CASE(AArch64ISD::CCMN) - MAKE_CASE(AArch64ISD::FCCMP) - MAKE_CASE(AArch64ISD::FCMP) - MAKE_CASE(AArch64ISD::STRICT_FCMP) - MAKE_CASE(AArch64ISD::STRICT_FCMPE) - MAKE_CASE(AArch64ISD::FCVTXN) - MAKE_CASE(AArch64ISD::SME_ZA_LDR) - MAKE_CASE(AArch64ISD::SME_ZA_STR) - MAKE_CASE(AArch64ISD::DUP) - MAKE_CASE(AArch64ISD::DUPLANE8) - MAKE_CASE(AArch64ISD::DUPLANE16) - MAKE_CASE(AArch64ISD::DUPLANE32) - MAKE_CASE(AArch64ISD::DUPLANE64) - MAKE_CASE(AArch64ISD::DUPLANE128) - MAKE_CASE(AArch64ISD::MOVI) - MAKE_CASE(AArch64ISD::MOVIshift) - MAKE_CASE(AArch64ISD::MOVIedit) - MAKE_CASE(AArch64ISD::MOVImsl) - MAKE_CASE(AArch64ISD::FMOV) - MAKE_CASE(AArch64ISD::MVNIshift) - MAKE_CASE(AArch64ISD::MVNImsl) - MAKE_CASE(AArch64ISD::BICi) - MAKE_CASE(AArch64ISD::ORRi) - MAKE_CASE(AArch64ISD::BSP) - MAKE_CASE(AArch64ISD::ZIP1) - MAKE_CASE(AArch64ISD::ZIP2) - MAKE_CASE(AArch64ISD::UZP1) - MAKE_CASE(AArch64ISD::UZP2) - MAKE_CASE(AArch64ISD::TRN1) - MAKE_CASE(AArch64ISD::TRN2) - MAKE_CASE(AArch64ISD::REV16) - MAKE_CASE(AArch64ISD::REV32) - MAKE_CASE(AArch64ISD::REV64) - MAKE_CASE(AArch64ISD::EXT) - MAKE_CASE(AArch64ISD::SPLICE) - MAKE_CASE(AArch64ISD::VSHL) - MAKE_CASE(AArch64ISD::VLSHR) - MAKE_CASE(AArch64ISD::VASHR) - MAKE_CASE(AArch64ISD::VSLI) - MAKE_CASE(AArch64ISD::VSRI) - MAKE_CASE(AArch64ISD::FCMEQ) - MAKE_CASE(AArch64ISD::FCMGE) - MAKE_CASE(AArch64ISD::FCMGT) - MAKE_CASE(AArch64ISD::SADDV) - MAKE_CASE(AArch64ISD::UADDV) - MAKE_CASE(AArch64ISD::UADDLV) - MAKE_CASE(AArch64ISD::SADDLV) - MAKE_CASE(AArch64ISD::SADDWT) - MAKE_CASE(AArch64ISD::SADDWB) - MAKE_CASE(AArch64ISD::UADDWT) - MAKE_CASE(AArch64ISD::UADDWB) - MAKE_CASE(AArch64ISD::SDOT) - MAKE_CASE(AArch64ISD::UDOT) - MAKE_CASE(AArch64ISD::USDOT) - MAKE_CASE(AArch64ISD::SMINV) - MAKE_CASE(AArch64ISD::UMINV) - MAKE_CASE(AArch64ISD::SMAXV) - MAKE_CASE(AArch64ISD::UMAXV) - MAKE_CASE(AArch64ISD::SADDV_PRED) - MAKE_CASE(AArch64ISD::UADDV_PRED) - MAKE_CASE(AArch64ISD::SMAXV_PRED) - MAKE_CASE(AArch64ISD::UMAXV_PRED) - MAKE_CASE(AArch64ISD::SMINV_PRED) - MAKE_CASE(AArch64ISD::UMINV_PRED) - MAKE_CASE(AArch64ISD::ORV_PRED) - MAKE_CASE(AArch64ISD::EORV_PRED) - MAKE_CASE(AArch64ISD::ANDV_PRED) - MAKE_CASE(AArch64ISD::CLASTA_N) - MAKE_CASE(AArch64ISD::CLASTB_N) - MAKE_CASE(AArch64ISD::LASTA) - MAKE_CASE(AArch64ISD::LASTB) - MAKE_CASE(AArch64ISD::REINTERPRET_CAST) - MAKE_CASE(AArch64ISD::LS64_BUILD) - MAKE_CASE(AArch64ISD::LS64_EXTRACT) - MAKE_CASE(AArch64ISD::TBL) - MAKE_CASE(AArch64ISD::FADD_PRED) - MAKE_CASE(AArch64ISD::FADDA_PRED) - MAKE_CASE(AArch64ISD::FADDV_PRED) - MAKE_CASE(AArch64ISD::FDIV_PRED) - MAKE_CASE(AArch64ISD::FMA_PRED) - MAKE_CASE(AArch64ISD::FMAX_PRED) - MAKE_CASE(AArch64ISD::FMAXV_PRED) - MAKE_CASE(AArch64ISD::FMAXNM_PRED) - MAKE_CASE(AArch64ISD::FMAXNMV_PRED) - MAKE_CASE(AArch64ISD::FMIN_PRED) - MAKE_CASE(AArch64ISD::FMINV_PRED) - MAKE_CASE(AArch64ISD::FMINNM_PRED) - MAKE_CASE(AArch64ISD::FMINNMV_PRED) - MAKE_CASE(AArch64ISD::FMUL_PRED) - MAKE_CASE(AArch64ISD::FSUB_PRED) - MAKE_CASE(AArch64ISD::RDSVL) - MAKE_CASE(AArch64ISD::BIC) - MAKE_CASE(AArch64ISD::CBZ) - MAKE_CASE(AArch64ISD::CBNZ) - MAKE_CASE(AArch64ISD::TBZ) - MAKE_CASE(AArch64ISD::TBNZ) - MAKE_CASE(AArch64ISD::TC_RETURN) - MAKE_CASE(AArch64ISD::PREFETCH) - MAKE_CASE(AArch64ISD::SITOF) - MAKE_CASE(AArch64ISD::UITOF) - MAKE_CASE(AArch64ISD::NVCAST) - MAKE_CASE(AArch64ISD::MRS) - MAKE_CASE(AArch64ISD::SQSHL_I) - MAKE_CASE(AArch64ISD::UQSHL_I) - MAKE_CASE(AArch64ISD::SRSHR_I) - MAKE_CASE(AArch64ISD::URSHR_I) - MAKE_CASE(AArch64ISD::SQSHLU_I) - MAKE_CASE(AArch64ISD::WrapperLarge) - MAKE_CASE(AArch64ISD::LD2post) - MAKE_CASE(AArch64ISD::LD3post) - MAKE_CASE(AArch64ISD::LD4post) - MAKE_CASE(AArch64ISD::ST2post) - MAKE_CASE(AArch64ISD::ST3post) - MAKE_CASE(AArch64ISD::ST4post) - MAKE_CASE(AArch64ISD::LD1x2post) - MAKE_CASE(AArch64ISD::LD1x3post) - MAKE_CASE(AArch64ISD::LD1x4post) - MAKE_CASE(AArch64ISD::ST1x2post) - MAKE_CASE(AArch64ISD::ST1x3post) - MAKE_CASE(AArch64ISD::ST1x4post) - MAKE_CASE(AArch64ISD::LD1DUPpost) - MAKE_CASE(AArch64ISD::LD2DUPpost) - MAKE_CASE(AArch64ISD::LD3DUPpost) - MAKE_CASE(AArch64ISD::LD4DUPpost) - MAKE_CASE(AArch64ISD::LD1LANEpost) - MAKE_CASE(AArch64ISD::LD2LANEpost) - MAKE_CASE(AArch64ISD::LD3LANEpost) - MAKE_CASE(AArch64ISD::LD4LANEpost) - MAKE_CASE(AArch64ISD::ST2LANEpost) - MAKE_CASE(AArch64ISD::ST3LANEpost) - MAKE_CASE(AArch64ISD::ST4LANEpost) - MAKE_CASE(AArch64ISD::SMULL) - MAKE_CASE(AArch64ISD::UMULL) - MAKE_CASE(AArch64ISD::PMULL) - MAKE_CASE(AArch64ISD::FRECPE) - MAKE_CASE(AArch64ISD::FRECPS) - MAKE_CASE(AArch64ISD::FRSQRTE) - MAKE_CASE(AArch64ISD::FRSQRTS) - MAKE_CASE(AArch64ISD::STG) - MAKE_CASE(AArch64ISD::STZG) - MAKE_CASE(AArch64ISD::ST2G) - MAKE_CASE(AArch64ISD::STZ2G) - MAKE_CASE(AArch64ISD::SUNPKHI) - MAKE_CASE(AArch64ISD::SUNPKLO) - MAKE_CASE(AArch64ISD::UUNPKHI) - MAKE_CASE(AArch64ISD::UUNPKLO) - MAKE_CASE(AArch64ISD::INSR) - MAKE_CASE(AArch64ISD::PTEST) - MAKE_CASE(AArch64ISD::PTEST_ANY) - MAKE_CASE(AArch64ISD::PTRUE) - MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO) - MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO) - MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO) - MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO) - MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO) - MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO) - MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO) - MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1Q_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1Q_INDEX_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) - MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO) - MAKE_CASE(AArch64ISD::SST1Q_PRED) - MAKE_CASE(AArch64ISD::SST1Q_INDEX_PRED) - MAKE_CASE(AArch64ISD::ST1_PRED) - MAKE_CASE(AArch64ISD::SST1_PRED) - MAKE_CASE(AArch64ISD::SST1_SCALED_PRED) - MAKE_CASE(AArch64ISD::SST1_SXTW_PRED) - MAKE_CASE(AArch64ISD::SST1_UXTW_PRED) - MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED) - MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED) - MAKE_CASE(AArch64ISD::SST1_IMM_PRED) - MAKE_CASE(AArch64ISD::SSTNT1_PRED) - MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED) - MAKE_CASE(AArch64ISD::LDP) - MAKE_CASE(AArch64ISD::LDIAPP) - MAKE_CASE(AArch64ISD::LDNP) - MAKE_CASE(AArch64ISD::STP) - MAKE_CASE(AArch64ISD::STILP) - MAKE_CASE(AArch64ISD::STNP) - MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::REVH_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::REVW_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::REVD_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU) - MAKE_CASE(AArch64ISD::ADDP) - MAKE_CASE(AArch64ISD::SADDLP) - MAKE_CASE(AArch64ISD::UADDLP) - MAKE_CASE(AArch64ISD::CALL_RVMARKER) - MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL) - MAKE_CASE(AArch64ISD::CALL_BTI) - MAKE_CASE(AArch64ISD::MRRS) - MAKE_CASE(AArch64ISD::MSRR) - MAKE_CASE(AArch64ISD::RSHRNB_I) - MAKE_CASE(AArch64ISD::CTTZ_ELTS) - MAKE_CASE(AArch64ISD::CALL_ARM64EC_TO_X64) - MAKE_CASE(AArch64ISD::URSHR_I_PRED) - MAKE_CASE(AArch64ISD::CB) - } -#undef MAKE_CASE - return nullptr; -} - MachineBasicBlock * AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -23311,12 +22987,6 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) { unsigned Opc = N->getOpcode(); - assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads - Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || - (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads - Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && - "Invalid opcode."); - const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO || Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO; const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO || diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index b59526bf01888..1924d20f67f49 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -23,498 +23,6 @@ namespace llvm { -namespace AArch64ISD { - -// For predicated nodes where the result is a vector, the operation is -// controlled by a governing predicate and the inactive lanes are explicitly -// defined with a value, please stick the following naming convention: -// -// _MERGE_OP The result value is a vector with inactive lanes equal -// to source operand OP. -// -// _MERGE_ZERO The result value is a vector with inactive lanes -// actively zeroed. -// -// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal -// to the last source operand which only purpose is being -// a passthru value. -// -// For other cases where no explicit action is needed to set the inactive lanes, -// or when the result is not a vector and it is needed or helpful to -// distinguish a node from similar unpredicated nodes, use: -// -// _PRED -// -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. - CALL, // Function call. - - // Pseudo for a OBJC call that gets emitted together with a special `mov - // x29, x29` marker instruction. - CALL_RVMARKER, - - CALL_BTI, // Function call followed by a BTI instruction. - - // Function call, authenticating the callee value first: - // AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands. - AUTH_CALL, - // AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc, - // operands. - AUTH_TC_RETURN, - - // Authenticated variant of CALL_RVMARKER. - AUTH_CALL_RVMARKER, - - COALESCER_BARRIER, - - VG_SAVE, - VG_RESTORE, - - SMSTART, - SMSTOP, - COND_SMSTART, - COND_SMSTOP, - RESTORE_ZA, - RESTORE_ZT, - SAVE_ZT, - - // A call with the callee in x16, i.e. "blr x16". - CALL_ARM64EC_TO_X64, - - // Produces the full sequence of instructions for getting the thread pointer - // offset of a variable into X0, using the TLSDesc model. - TLSDESC_CALLSEQ, - TLSDESC_AUTH_CALLSEQ, - ADRP, // Page address of a TargetGlobalAddress operand. - ADR, // ADR - ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. - LOADgot, // Load from automatically generated descriptor (e.g. Global - // Offset Table, TLS record). - RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. - BRCOND, // Conditional branch instruction; "b.cond". - CSEL, - CSINV, // Conditional select invert. - CSNEG, // Conditional select negate. - CSINC, // Conditional select increment. - - // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on - // ELF. - THREAD_POINTER, - ADC, - SBC, // adc, sbc instructions - - // To avoid stack clash, allocation is performed by block and each block is - // probed. - PROBED_ALLOCA, - - // Predicated instructions where inactive lanes produce undefined results. - ABDS_PRED, - ABDU_PRED, - FADD_PRED, - FDIV_PRED, - FMA_PRED, - FMAX_PRED, - FMAXNM_PRED, - FMIN_PRED, - FMINNM_PRED, - FMUL_PRED, - FSUB_PRED, - HADDS_PRED, - HADDU_PRED, - MUL_PRED, - MULHS_PRED, - MULHU_PRED, - RHADDS_PRED, - RHADDU_PRED, - SDIV_PRED, - SHL_PRED, - SMAX_PRED, - SMIN_PRED, - SRA_PRED, - SRL_PRED, - UDIV_PRED, - UMAX_PRED, - UMIN_PRED, - - // Unpredicated vector instructions - BIC, - - SRAD_MERGE_OP1, - - // Predicated instructions with the result of inactive lanes provided by the - // last operand. - FABS_MERGE_PASSTHRU, - FCEIL_MERGE_PASSTHRU, - FFLOOR_MERGE_PASSTHRU, - FNEARBYINT_MERGE_PASSTHRU, - FNEG_MERGE_PASSTHRU, - FRECPX_MERGE_PASSTHRU, - FRINT_MERGE_PASSTHRU, - FROUND_MERGE_PASSTHRU, - FROUNDEVEN_MERGE_PASSTHRU, - FSQRT_MERGE_PASSTHRU, - FTRUNC_MERGE_PASSTHRU, - FP_ROUND_MERGE_PASSTHRU, - FP_EXTEND_MERGE_PASSTHRU, - UINT_TO_FP_MERGE_PASSTHRU, - SINT_TO_FP_MERGE_PASSTHRU, - FCVTX_MERGE_PASSTHRU, - FCVTZU_MERGE_PASSTHRU, - FCVTZS_MERGE_PASSTHRU, - SIGN_EXTEND_INREG_MERGE_PASSTHRU, - ZERO_EXTEND_INREG_MERGE_PASSTHRU, - ABS_MERGE_PASSTHRU, - NEG_MERGE_PASSTHRU, - - SETCC_MERGE_ZERO, - - // Arithmetic instructions which write flags. - ADDS, - SUBS, - ADCS, - SBCS, - ANDS, - - // Conditional compares. Operands: left,right,falsecc,cc,flags - CCMP, - CCMN, - FCCMP, - - // Floating point comparison - FCMP, - - // Scalar-to-vector duplication - DUP, - DUPLANE8, - DUPLANE16, - DUPLANE32, - DUPLANE64, - DUPLANE128, - - // Vector immedate moves - MOVI, - MOVIshift, - MOVIedit, - MOVImsl, - FMOV, - MVNIshift, - MVNImsl, - - // Vector immediate ops - BICi, - ORRi, - - // Vector bitwise select: similar to ISD::VSELECT but not all bits within an - // element must be identical. - BSP, - - // Vector shuffles - ZIP1, - ZIP2, - UZP1, - UZP2, - TRN1, - TRN2, - REV16, - REV32, - REV64, - EXT, - SPLICE, - - // Vector shift by scalar - VSHL, - VLSHR, - VASHR, - - // Vector shift by scalar (again) - SQSHL_I, - UQSHL_I, - SQSHLU_I, - SRSHR_I, - URSHR_I, - URSHR_I_PRED, - - // Vector narrowing shift by immediate (bottom) - RSHRNB_I, - - // Vector shift by constant and insert - VSLI, - VSRI, - - // Vector comparisons - FCMEQ, - FCMGE, - FCMGT, - - // Round wide FP to narrow FP with inexact results to odd. - FCVTXN, - - // Vector across-lanes addition - // Only the lower result lane is defined. - SADDV, - UADDV, - - // Unsigned sum Long across Vector - UADDLV, - SADDLV, - - // Wide adds - SADDWT, - SADDWB, - UADDWT, - UADDWB, - - // Add Pairwise of two vectors - ADDP, - // Add Long Pairwise - SADDLP, - UADDLP, - - // udot/sdot/usdot instructions - UDOT, - SDOT, - USDOT, - - // Vector across-lanes min/max - // Only the lower result lane is defined. - SMINV, - UMINV, - SMAXV, - UMAXV, - - SADDV_PRED, - UADDV_PRED, - SMAXV_PRED, - UMAXV_PRED, - SMINV_PRED, - UMINV_PRED, - ORV_PRED, - EORV_PRED, - ANDV_PRED, - - // Compare-and-branch - CBZ, - CBNZ, - TBZ, - TBNZ, - - // Tail calls - TC_RETURN, - - // Custom prefetch handling - PREFETCH, - - // {s|u}int to FP within a FP register. - SITOF, - UITOF, - - /// Natural vector cast. ISD::BITCAST is not natural in the big-endian - /// world w.r.t vectors; which causes additional REV instructions to be - /// generated to compensate for the byte-swapping. But sometimes we do - /// need to re-interpret the data in SIMD vector registers in big-endian - /// mode without emitting such REV instructions. - NVCAST, - - MRS, // MRS, also sets the flags via a glue. - - SMULL, - UMULL, - - PMULL, - - // Reciprocal estimates and steps. - FRECPE, - FRECPS, - FRSQRTE, - FRSQRTS, - - SUNPKHI, - SUNPKLO, - UUNPKHI, - UUNPKLO, - - CLASTA_N, - CLASTB_N, - LASTA, - LASTB, - TBL, - - // Floating-point reductions. - FADDA_PRED, - FADDV_PRED, - FMAXV_PRED, - FMAXNMV_PRED, - FMINV_PRED, - FMINNMV_PRED, - - INSR, - PTEST, - PTEST_ANY, - PTRUE, - - CTTZ_ELTS, - - BITREVERSE_MERGE_PASSTHRU, - BSWAP_MERGE_PASSTHRU, - REVH_MERGE_PASSTHRU, - REVW_MERGE_PASSTHRU, - CTLZ_MERGE_PASSTHRU, - CTPOP_MERGE_PASSTHRU, - DUP_MERGE_PASSTHRU, - - // Cast between vectors of the same element type but differ in length. - REINTERPRET_CAST, - - // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa - LS64_BUILD, - LS64_EXTRACT, - - LD1_MERGE_ZERO, - LD1S_MERGE_ZERO, - LDNF1_MERGE_ZERO, - LDNF1S_MERGE_ZERO, - LDFF1_MERGE_ZERO, - LDFF1S_MERGE_ZERO, - LD1RQ_MERGE_ZERO, - LD1RO_MERGE_ZERO, - - // Unsigned gather loads. - GLD1_MERGE_ZERO, - GLD1_SCALED_MERGE_ZERO, - GLD1_UXTW_MERGE_ZERO, - GLD1_SXTW_MERGE_ZERO, - GLD1_UXTW_SCALED_MERGE_ZERO, - GLD1_SXTW_SCALED_MERGE_ZERO, - GLD1_IMM_MERGE_ZERO, - GLD1Q_MERGE_ZERO, - GLD1Q_INDEX_MERGE_ZERO, - - // Signed gather loads - GLD1S_MERGE_ZERO, - GLD1S_SCALED_MERGE_ZERO, - GLD1S_UXTW_MERGE_ZERO, - GLD1S_SXTW_MERGE_ZERO, - GLD1S_UXTW_SCALED_MERGE_ZERO, - GLD1S_SXTW_SCALED_MERGE_ZERO, - GLD1S_IMM_MERGE_ZERO, - - // Unsigned gather loads. - GLDFF1_MERGE_ZERO, - GLDFF1_SCALED_MERGE_ZERO, - GLDFF1_UXTW_MERGE_ZERO, - GLDFF1_SXTW_MERGE_ZERO, - GLDFF1_UXTW_SCALED_MERGE_ZERO, - GLDFF1_SXTW_SCALED_MERGE_ZERO, - GLDFF1_IMM_MERGE_ZERO, - - // Signed gather loads. - GLDFF1S_MERGE_ZERO, - GLDFF1S_SCALED_MERGE_ZERO, - GLDFF1S_UXTW_MERGE_ZERO, - GLDFF1S_SXTW_MERGE_ZERO, - GLDFF1S_UXTW_SCALED_MERGE_ZERO, - GLDFF1S_SXTW_SCALED_MERGE_ZERO, - GLDFF1S_IMM_MERGE_ZERO, - - // Non-temporal gather loads - GLDNT1_MERGE_ZERO, - GLDNT1_INDEX_MERGE_ZERO, - GLDNT1S_MERGE_ZERO, - - // Contiguous masked store. - ST1_PRED, - - // Scatter store - SST1_PRED, - SST1_SCALED_PRED, - SST1_UXTW_PRED, - SST1_SXTW_PRED, - SST1_UXTW_SCALED_PRED, - SST1_SXTW_SCALED_PRED, - SST1_IMM_PRED, - SST1Q_PRED, - SST1Q_INDEX_PRED, - - // Non-temporal scatter store - SSTNT1_PRED, - SSTNT1_INDEX_PRED, - - // SME - RDSVL, - REVD_MERGE_PASSTHRU, - ALLOCATE_ZA_BUFFER, - INIT_TPIDR2OBJ, - - // Needed for __arm_agnostic("sme_za_state") - GET_SME_SAVE_SIZE, - ALLOC_SME_SAVE_BUFFER, - - // Asserts that a function argument (i32) is zero-extended to i8 by - // the caller - ASSERT_ZEXT_BOOL, - - // 128-bit system register accesses - // lo64, hi64, chain = MRRS(chain, sysregname) - MRRS, - // chain = MSRR(chain, sysregname, lo64, hi64) - MSRR, - - // Strict (exception-raising) floating point comparison - FIRST_STRICTFP_OPCODE, - STRICT_FCMP = FIRST_STRICTFP_OPCODE, - STRICT_FCMPE, - LAST_STRICTFP_OPCODE = STRICT_FCMPE, - - // NEON Load/Store with post-increment base updates - FIRST_MEMORY_OPCODE, - LD2post = FIRST_MEMORY_OPCODE, - LD3post, - LD4post, - ST2post, - ST3post, - ST4post, - LD1x2post, - LD1x3post, - LD1x4post, - ST1x2post, - ST1x3post, - ST1x4post, - LD1DUPpost, - LD2DUPpost, - LD3DUPpost, - LD4DUPpost, - LD1LANEpost, - LD2LANEpost, - LD3LANEpost, - LD4LANEpost, - ST2LANEpost, - ST3LANEpost, - ST4LANEpost, - - STG, - STZG, - ST2G, - STZ2G, - - LDP, - LDIAPP, - LDNP, - STP, - STILP, - STNP, - LAST_MEMORY_OPCODE = STNP, - - // SME ZA loads and stores - SME_ZA_LDR, - SME_ZA_STR, - - // Compare-and-branch - CB, -}; - -} // end namespace AArch64ISD - namespace AArch64 { /// Possible values of current rounding mode, which is specified in bits /// 23:22 of FPCR. @@ -621,8 +129,6 @@ class AArch64TargetLowering : public TargetLowering { /// Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - const char *getTargetNodeName(unsigned Opcode) const override; - SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; /// This method returns a target specific FastISel object, or null if the diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 61055a66e8858..f8b5e31d9efd8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -704,10 +704,15 @@ def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ }]>; // Node definitions. +// Compare-and-branch def AArch64CB : SDNode<"AArch64ISD::CB", SDT_AArch64cb, [SDNPHasChain]>; +// Page address of a TargetGlobalAddress operand. def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; +// Add the low 12 bits of a TargetGlobalAddress operand. def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; +// Load from automatically generated descriptor (e.g. Global Offset Table, TLS +// record). def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDCallSeqStart<[ SDTCisVT<0, i32>, @@ -722,21 +727,27 @@ def AArch64call : SDNode<"AArch64ISD::CALL", [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Function call followed by a BTI instruction. def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Pseudo for a OBJC call that gets emitted together with a special `mov +// x29, x29` marker instruction. def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// A call with the callee in x16, i.e. "blr x16". def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Function call, authenticating the callee value first: +// AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands. def AArch64authcall : SDNode<"AArch64ISD::AUTH_CALL", SDTypeProfile<0, -1, [SDTCisPtrTy<0>, SDTCisVT<1, i32>, @@ -745,6 +756,8 @@ def AArch64authcall : SDNode<"AArch64ISD::AUTH_CALL", [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc, +// operands. def AArch64authtcret: SDNode<"AArch64ISD::AUTH_TC_RETURN", SDTypeProfile<0, 5, [SDTCisPtrTy<0>, SDTCisVT<2, i32>, @@ -752,6 +765,7 @@ def AArch64authtcret: SDNode<"AArch64ISD::AUTH_TC_RETURN", SDTCisVT<4, i64>]>, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// Authenticated variant of CALL_RVMARKER. def AArch64authcall_rvmarker : SDNode<"AArch64ISD::AUTH_CALL_RVMARKER", SDTypeProfile<0, -1, [SDTCisPtrTy<0>, SDTCisVT<1, i32>, @@ -762,6 +776,7 @@ def AArch64authcall_rvmarker : SDNode<"AArch64ISD::AUTH_CALL_RVMARKER", [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Conditional branch instruction; "b.cond". def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, [SDNPHasChain]>; def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, @@ -775,13 +790,19 @@ def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; +// Conditional select invert. def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; +// Conditional select negate. def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; +// Conditional select increment. def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; +// Return with a glue operand. Operand 0 is the chain operand. def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; + +// Arithmetic instructions which write flags. def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, [SDNPCommutative]>; def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; @@ -790,21 +811,31 @@ def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; +// Conditional compares. Operands: left,right,falsecc,cc,flags def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; +// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on +// ELF. def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; +// Floating point comparison def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; -def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, - [SDNPHasChain]>; -def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, - [SDNPHasChain]>; + +let IsStrictFP = true in { + // Strict (exception-raising) floating point comparison + def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, + [SDNPHasChain]>; + def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, + [SDNPHasChain]>; +} + def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), [(AArch64strict_fcmp node:$lhs, node:$rhs), (AArch64fcmp node:$lhs, node:$rhs)]>; +// Scalar-to-vector duplication def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; @@ -814,6 +845,7 @@ def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; +// Vector shuffles def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; @@ -821,6 +853,7 @@ def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; +// Vector immedate moves def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; @@ -834,6 +867,9 @@ def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64Rev>; def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64Rev>; def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; +// Vector shift by scalar +def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; +def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; def AArch64vashr_exact : PatFrag<(ops node:$lhs, node:$rhs), @@ -841,16 +877,18 @@ def AArch64vashr_exact : PatFrag<(ops node:$lhs, node:$rhs), return N->getFlags().hasExact(); }]>; -def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; -def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; +// Vector shift by scalar (again) def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; + def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; +// Vector bitwise select: similar to ISD::VSELECT but not all bits within an +// element must be identical. def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; def AArch64cmeq : PatFrag<(ops node:$lhs, node:$rhs), @@ -864,6 +902,7 @@ def AArch64cmhi : PatFrag<(ops node:$lhs, node:$rhs), def AArch64cmhs : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETUGE)>; +// Vector comparisons def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; @@ -898,6 +937,7 @@ def AArch64fcmlez : PatFrag<(ops node:$lhs), def AArch64fcmltz : PatFrag<(ops node:$lhs), (AArch64fcmgt immAllZerosV, node:$lhs)>; +// Round wide FP to narrow FP with inexact results to odd. def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>; def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn), [(f32 (int_aarch64_sisd_fcvtxn (f64 node:$Rn))), @@ -908,18 +948,24 @@ def AArch64fcvtxnv: PatFrags<(ops node:$Rn), //def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>; +// Vector immediate ops def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; +// Tail calls def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// Custom prefetch handling def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, [SDNPHasChain, SDNPSideEffect]>; +// {s|u}int to FP within a FP register. def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; +// Produces the full sequence of instructions for getting the thread pointer +// offset of a variable into X0, using the TLSDesc model. def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", SDT_AArch64TLSDescCallSeq, [SDNPOutGlue, SDNPHasChain, SDNPVariadic]>; @@ -931,6 +977,11 @@ def AArch64tlsdesc_auth_callseq : SDNode<"AArch64ISD::TLSDESC_AUTH_CALLSEQ", def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", SDT_AArch64WrapperLarge>; +/// Natural vector cast. ISD::BITCAST is not natural in the big-endian +/// world w.r.t vectors; which causes additional REV instructions to be +/// generated to compensate for the byte-swapping. But sometimes we do +/// need to re-interpret the data in SIMD vector registers in big-endian +/// mode without emitting such REV instructions. def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, @@ -942,21 +993,30 @@ def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull, def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull, [SDNPCommutative]>; +// Reciprocal estimates and steps. def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; +// udot/sdot/usdot instructions def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>; def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>; def AArch64usdot : SDNode<"AArch64ISD::USDOT", SDT_AArch64Dot>; +// Vector across-lanes addition +// Only the lower result lane is defined. def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; + +// Vector across-lanes min/max +// Only the lower result lane is defined. def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; + +// Unsigned sum Long across Vector def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>; def AArch64saddlv : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>; @@ -967,7 +1027,9 @@ def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), [(abds node:$lhs, node:$rhs), (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; +// Add Pairwise of two vectors def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; +// Add Long Pairwise def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>; def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm), @@ -1029,18 +1091,35 @@ def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDN def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; +// To avoid stack clash, allocation is performed by block and each block is +// probed. def AArch64probedalloca : SDNode<"AArch64ISD::PROBED_ALLOCA", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPMayStore]>; +// MRS, also sets the flags via a glue. def AArch64mrs : SDNode<"AArch64ISD::MRS", SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>, [SDNPHasChain]>; +// 128-bit system register accesses +// lo64, hi64, chain = MRRS(chain, sysregname) +def AArch64mrrs : SDNode<"AArch64ISD::MRRS", + SDTypeProfile<2, 1, [SDTCisVT<0, i64>, + SDTCisVT<1, i64>]>, + [SDNPHasChain]>; + +// chain = MSRR(chain, sysregname, lo64, hi64) +def AArch64msrr : SDNode<"AArch64ISD::MSRR", + SDTypeProfile<0, 3, [SDTCisVT<1, i64>, + SDTCisVT<2, i64>]>, + [SDNPHasChain]>; + def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>; +// Vector narrowing shift by immediate (bottom) def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>; def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i), [(AArch64rshrnb node:$rs, node:$i), @@ -1049,6 +1128,72 @@ def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i), def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>, []>; +// NEON Load/Store with post-increment base updates. +// TODO: Complete SDTypeProfile constraints. +def AArch64ld2post : SDNode<"AArch64ISD::LD2post", SDTypeProfile<3, 2, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld3post : SDNode<"AArch64ISD::LD3post", SDTypeProfile<4, 2, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld4post : SDNode<"AArch64ISD::LD4post", SDTypeProfile<5, 2, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64st2post : SDNode<"AArch64ISD::ST2post", SDTypeProfile<1, 4, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64st3post : SDNode<"AArch64ISD::ST3post", SDTypeProfile<1, 5, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64st4post : SDNode<"AArch64ISD::ST4post", SDTypeProfile<1, 6, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64ld1x2post : SDNode<"AArch64ISD::LD1x2post", SDTypeProfile<3, 2, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld1x3post : SDNode<"AArch64ISD::LD1x3post", SDTypeProfile<4, 2, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld1x4post : SDNode<"AArch64ISD::LD1x4post", SDTypeProfile<5, 2, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64st1x2post : SDNode<"AArch64ISD::ST1x2post", SDTypeProfile<1, 4, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64st1x3post : SDNode<"AArch64ISD::ST1x3post", SDTypeProfile<1, 5, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64st1x4post : SDNode<"AArch64ISD::ST1x4post", SDTypeProfile<1, 6, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64ld1duppost : SDNode<"AArch64ISD::LD1DUPpost", SDTypeProfile<2, 2, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld2duppost : SDNode<"AArch64ISD::LD2DUPpost", SDTypeProfile<3, 2, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld3duppost: SDNode<"AArch64ISD::LD3DUPpost", SDTypeProfile<4, 2, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld4duppost: SDNode<"AArch64ISD::LD4DUPpost", SDTypeProfile<5, 2, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld1lanepost: SDNode<"AArch64ISD::LD1LANEpost", SDTypeProfile<2, 4, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld2lanepost : SDNode<"AArch64ISD::LD2LANEpost", SDTypeProfile<3, 5, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld3lanepost: SDNode<"AArch64ISD::LD3LANEpost", SDTypeProfile<4, 6, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ld4lanepost: SDNode<"AArch64ISD::LD4LANEpost", SDTypeProfile<5, 7, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64st2lanepost : SDNode<"AArch64ISD::ST2LANEpost", SDTypeProfile<1, 5, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64st3lanepost : SDNode<"AArch64ISD::ST3LANEpost", SDTypeProfile<1, 6, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64st4lanepost : SDNode<"AArch64ISD::ST4LANEpost", SDTypeProfile<1, 7, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// Scatter store +def AArch64sstnt1_index_pred + : SDNode<"AArch64ISD::SSTNT1_INDEX_PRED", SDTypeProfile<0, 5, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +// Non-temporal scatter store +def AArch64sst1q_index_pred + : SDNode<"AArch64ISD::SST1Q_INDEX_PRED", SDTypeProfile<0, 5, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +// Non-temporal gather loads +def AArch64gldnt1_index_merge_zero + : SDNode<"AArch64ISD::GLDNT1_INDEX_MERGE_ZERO", SDTypeProfile<1, 4, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +// Unsigned gather loads. +def AArch64gld1q_index_merge_zero + : SDNode<"AArch64ISD::GLD1Q_INDEX_MERGE_ZERO", SDTypeProfile<1, 4, []>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + // Match add node and also treat an 'or' node is as an 'add' if the or'ed operands // have no common bits. def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), @@ -5697,14 +5842,14 @@ def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))), (v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>; // Patterns for funnel shifts to be matched to equivalent REV instructions -def : Pat<(v2i64 (or (v2i64 (AArch64vshl (v2i64 V128:$Rn), (i32 32))), - (v2i64 (AArch64vlshr (v2i64 V128:$Rn), (i32 32))))), +def : Pat<(v2i64 (or (v2i64 (AArch64vshl (v2i64 V128:$Rn), (i32 32))), + (v2i64 (AArch64vlshr (v2i64 V128:$Rn), (i32 32))))), (v2i64 (REV64v4i32 (v2i64 V128:$Rn)))>; -def : Pat<(v4i32 (or (v4i32 (AArch64vshl (v4i32 V128:$Rn), (i32 16))), - (v4i32 (AArch64vlshr (v4i32 V128:$Rn), (i32 16))))), +def : Pat<(v4i32 (or (v4i32 (AArch64vshl (v4i32 V128:$Rn), (i32 16))), + (v4i32 (AArch64vlshr (v4i32 V128:$Rn), (i32 16))))), (v4i32 (REV32v8i16 (v4i32 V128:$Rn)))>; -def : Pat<(v2i32 (or (v2i32 (AArch64vshl (v2i32 V64:$Rn), (i32 16))), - (v2i32 (AArch64vlshr (v2i32 V64:$Rn), (i32 16))))), +def : Pat<(v2i32 (or (v2i32 (AArch64vshl (v2i32 V64:$Rn), (i32 16))), + (v2i32 (AArch64vlshr (v2i32 V64:$Rn), (i32 16))))), (v2i32 (REV32v4i16 (v2i32 V64:$Rn)))>; //===----------------------------------------------------------------------===// @@ -10268,6 +10413,8 @@ def StoreSwiftAsyncContext : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), []>, Sched<[]>; +// Asserts that a function argument (i32) is zero-extended to i8 by +// the caller def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; def : Pat<(AArch64AssertZExtBool GPR32:$op), (i32 GPR32:$op)>; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index e7482da001074..61abc3c3c479a 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -61,6 +61,7 @@ let usesCustomInserter = 1 in { } // Nodes to allocate a save buffer for SME. +// Needed for __arm_agnostic("sme_za_state"). def AArch64SMESaveSize : SDNode<"AArch64ISD::GET_SME_SAVE_SIZE", SDTypeProfile<1, 0, [SDTCisInt<0>]>, [SDNPHasChain]>; let usesCustomInserter = 1, Defs = [X0] in { diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 020051bbadea5..a40ef56f30486 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -10,16 +10,35 @@ // //===----------------------------------------------------------------------===// -// For predicated nodes where the entire operation is controlled by a governing -// predicate, please stick to a similar naming convention as used for the -// ISD nodes: +// For predicated nodes where the result is a vector, the operation is +// controlled by a governing predicate and the inactive lanes are explicitly +// defined with a value, please stick the following naming convention for ISD +// nodes: // -// SDNode <=> AArch64ISD -// ------------------------------- -// _m <=> _MERGE_OP -// _mt <=> _MERGE_PASSTHRU -// _z <=> _MERGE_ZERO -// _p <=> _PRED +// _MERGE_OP The result value is a vector with inactive lanes equal +// to source operand OP. +// +// _MERGE_ZERO The result value is a vector with inactive lanes +// actively zeroed. +// +// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal +// to the last source operand which only purpose is being +// a passthru value. +// +// For other cases where no explicit action is needed to set the inactive lanes, +// or when the result is not a vector and it is needed or helpful to +// distinguish a node from similar unpredicated nodes, use: +// +// _PRED +// +// The TableGen definition names should be based on the ISD node's name: +// +// TableGen SDNode <=> AArch64ISD +// -------------------------------------------- +// _m <=> _MERGE_OP +// _mt <=> _MERGE_PASSTHRU +// _z <=> _MERGE_ZERO +// _p <=> _PRED // // Given the context of this file, it is not strictly necessary to use _p to // distinguish predicated from unpredicated nodes given that most SVE @@ -66,6 +85,7 @@ def SDT_AArch64_GATHER_VS : SDTypeProfile<1, 4, [ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> ]>; +// Unsigned gather loads. def AArch64ld1_gather_z : SDNode<"AArch64ISD::GLD1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ld1_gather_scaled_z : SDNode<"AArch64ISD::GLD1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ld1_gather_uxtw_z : SDNode<"AArch64ISD::GLD1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; @@ -74,6 +94,7 @@ def AArch64ld1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1_UXTW_SCALED_MERGE def AArch64ld1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ld1_gather_imm_z : SDNode<"AArch64ISD::GLD1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; +// Signed gather loads def AArch64ld1s_gather_z : SDNode<"AArch64ISD::GLD1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ld1s_gather_scaled_z : SDNode<"AArch64ISD::GLD1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ld1s_gather_uxtw_z : SDNode<"AArch64ISD::GLD1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; @@ -82,6 +103,7 @@ def AArch64ld1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED_MER def AArch64ld1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ld1s_gather_imm_z : SDNode<"AArch64ISD::GLD1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; +// Unsigned gather loads. def AArch64ldff1_gather_z : SDNode<"AArch64ISD::GLDFF1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ldff1_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ldff1_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; @@ -90,6 +112,7 @@ def AArch64ldff1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED_M def AArch64ldff1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ldff1_gather_imm_z : SDNode<"AArch64ISD::GLDFF1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; +// Signed gather loads. def AArch64ldff1s_gather_z : SDNode<"AArch64ISD::GLDFF1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ldff1s_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ldff1s_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; @@ -98,6 +121,7 @@ def AArch64ldff1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED def AArch64ldff1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; +// Non-temporal gather loads def AArch64ldnt1_gather_z : SDNode<"AArch64ISD::GLDNT1_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; def AArch64ldnt1s_gather_z : SDNode<"AArch64ISD::GLDNT1S_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; // Gather vector base + scalar offset @@ -110,6 +134,7 @@ def SDT_AArch64_ST1 : SDTypeProfile<0, 4, [ SDTCVecEltisVT<2,i1>, SDTCisSameNumEltsAs<0,2> ]>; +// Contiguous masked store. def AArch64st1 : SDNode<"AArch64ISD::ST1_PRED", SDT_AArch64_ST1, [SDNPHasChain, SDNPMayStore]>; // Scatter stores - node definitions @@ -124,6 +149,7 @@ def SDT_AArch64_SCATTER_VS : SDTypeProfile<0, 5, [ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> ]>; +// Scatter store def AArch64st1_scatter : SDNode<"AArch64ISD::SST1_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; @@ -132,6 +158,7 @@ def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED_PRED", def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; +// Non-temporal scatter store def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; // Scatter vector base + scalar offset @@ -152,6 +179,11 @@ def sve_cntw_imm_neg : ComplexPattern">; def sve_cntd_imm_neg : ComplexPattern">; def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>; +def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, + [SDTCisVec<1>, SDTCVecEltisVT<1,i1>, SDTCisVec<3>, SDTCisSameNumEltsAs<1,3>]>; + +// Floating-point reductions. +def AArch64fadda_p_node : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; def AArch64faddv_p : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>; def AArch64fmaxv_p : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>; def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>; @@ -361,11 +393,8 @@ def AArch64fcvtx_mt : SDNode<"AArch64ISD::FCVTX_MERGE_PASSTHRU", SDT_AArch64FCV def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>; -def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, - [SDTCisVec<1>, SDTCVecEltisVT<1,i1>, SDTCisVec<3>, SDTCisSameNumEltsAs<1,3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; -def AArch64fadda_p_node : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(AArch64fadda_p_node node:$op1, node:$op2, node:$op3), @@ -390,6 +419,7 @@ def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED def AArch64splice : SDNode<"AArch64ISD::SPLICE", SDT_AArch64Arith>; +// Cast between vectors of the same element type but differ in length. def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>; let HasOneUse = 1 in @@ -430,10 +460,12 @@ def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [ SDTCisSameAs<0,1>, SDTCisSameAs<1,2> ]>; +// Unpredicated vector instructions def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>; def SDT_AArch64addw : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>; +// Wide adds def AArch64saddwt : SDNode<"AArch64ISD::SADDWT", SDT_AArch64addw>; def AArch64saddwb : SDNode<"AArch64ISD::SADDWB", SDT_AArch64addw>; def AArch64uaddwt : SDNode<"AArch64ISD::UADDWT", SDT_AArch64addw>; diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 2273e1c0ffa6e..0d368b7c280c8 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -10,9 +10,14 @@ // //===----------------------------------------------------------------------===// +#include "AArch64SelectionDAGInfo.h" #include "AArch64TargetMachine.h" #include "Utils/AArch64SMEAttributes.h" +#define GET_SDNODE_DESC +#include "AArch64GenSDNodeInfo.inc" +#undef GET_SDNODE_DESC + using namespace llvm; #define DEBUG_TYPE "aarch64-selectiondag-info" @@ -23,22 +28,15 @@ static cl::opt "to lower to librt functions"), cl::init(true)); -bool AArch64SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= AArch64ISD::FIRST_MEMORY_OPCODE && - Opcode <= AArch64ISD::LAST_MEMORY_OPCODE; -} - -bool AArch64SelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { - return Opcode >= AArch64ISD::FIRST_STRICTFP_OPCODE && - Opcode <= AArch64ISD::LAST_STRICTFP_OPCODE; -} +AArch64SelectionDAGInfo::AArch64SelectionDAGInfo() + : SelectionDAGGenTargetInfo(AArch64GenSDNodeInfo) {} void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, const SDNode *N) const { #ifndef NDEBUG switch (N->getOpcode()) { default: - break; + return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); case AArch64ISD::SADDWT: case AArch64ISD::SADDWB: case AArch64ISD::UADDWT: diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h index 9c11833b3f67e..42c2797ebdd17 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -14,14 +14,17 @@ #define LLVM_LIB_TARGET_AARCH64_AARCH64SELECTIONDAGINFO_H #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/IR/RuntimeLibcalls.h" + +#define GET_SDNODE_ENUM +#include "AArch64GenSDNodeInfo.inc" +#undef GET_SDNODE_ENUM namespace llvm { -class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo { +class AArch64SelectionDAGInfo : public SelectionDAGGenTargetInfo { public: - bool isTargetMemoryOpcode(unsigned Opcode) const override; - - bool isTargetStrictFPOpcode(unsigned Opcode) const override; + AArch64SelectionDAGInfo(); void verifyTargetNode(const SelectionDAG &DAG, const SDNode *N) const override; @@ -60,6 +63,6 @@ class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo { SDValue Src, SDValue Size, RTLIB::Libcall LC) const; }; -} +} // namespace llvm #endif diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt index 583003f2f46e6..9cf6f8a86b7d6 100644 --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -24,6 +24,7 @@ tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM AArch64GenRegisterBank.inc -gen-register-bank) tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AArch64GenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM AArch64GenSystemOperands.inc -gen-searchable-tables) tablegen(LLVM AArch64GenExegesis.inc -gen-exegesis) diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 97e1da7df56b7..b3005d5120229 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -73,6 +73,7 @@ def FILL_PPR_FROM_ZPR_SLOT_PSEUDO : } def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; +// SME ZA loads and stores def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore, [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>; def AArch64SMEStr : SDNode<"AArch64ISD::SME_ZA_STR", SDTZALoadStore, @@ -285,7 +286,7 @@ class SME2_Tile_VG4_Multi_Pat : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))), - (!cast(name) $base, $offset)>; + (!cast(name) $base, $offset)>; class SME2_Tile_Movaz_Pat : Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))), @@ -2337,7 +2338,7 @@ multiclass sme2_int_mla_long_array_vg2_single op, SDPat multiclass sme2_fp_mla_long_array_vg4_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic, list uses=[]> { - def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, + def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr { let Uses = uses; } @@ -5437,7 +5438,7 @@ multiclass sme2p1_zero_matrix { def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; def : SME2_Zero_Matrix_Pat; -} +} //===----------------------------------------------------------------------===// // SME2.1 lookup table expand two non-contiguous registers diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index d1677cdaeceac..8b025219c46cf 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -20,7 +20,6 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_unittest(CodeGenTests - AArch64SelectionDAGTest.cpp AllocationOrderTest.cpp AMDGPUMetadataTest.cpp AsmPrinterDwarfTest.cpp diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp similarity index 97% rename from llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp rename to llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp index cf92bdc281637..f06f03bb35a5d 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp @@ -1,12 +1,11 @@ -//===- llvm/unittest/CodeGen/AArch64SelectionDAGTest.cpp -------------------------===// -// +//===----------------------------------------------------------------------===// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "../lib/Target/AArch64/AArch64ISelLowering.h" +#include "AArch64SelectionDAGInfo.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/AsmParser/Parser.h" @@ -27,8 +26,9 @@ namespace llvm { class AArch64SelectionDAGTest : public testing::Test { protected: static void SetUpTestCase() { - InitializeAllTargets(); - InitializeAllTargetMCs(); + LLVMInitializeAArch64TargetInfo(); + LLVMInitializeAArch64Target(); + LLVMInitializeAArch64TargetMC(); } void SetUp() override { @@ -37,18 +37,11 @@ class AArch64SelectionDAGTest : public testing::Test { Triple TargetTriple("aarch64--"); std::string Error; const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error); - // FIXME: These tests do not depend on AArch64 specifically, but we have to - // initialize a target. A skeleton Target for unittests would allow us to - // always run these tests. - if (!T) - GTEST_SKIP(); TargetOptions Options; TM = std::unique_ptr( T->createTargetMachine(TargetTriple, "", "+sve", Options, std::nullopt, std::nullopt, CodeGenOptLevel::Aggressive)); - if (!TM) - GTEST_SKIP(); SMDiagnostic SMError; M = parseAssemblyString(Assembly, SMError, Context); @@ -144,7 +137,8 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_SIGN_EXTEND_VECTOR_INREG) { EXPECT_EQ(DAG->ComputeNumSignBits(Op, DemandedElts), 15u); } -TEST_F(AArch64SelectionDAGTest, ComputeNumSignBitsSVE_SIGN_EXTEND_VECTOR_INREG) { +TEST_F(AArch64SelectionDAGTest, + ComputeNumSignBitsSVE_SIGN_EXTEND_VECTOR_INREG) { SDLoc Loc; auto Int8VT = EVT::getIntegerVT(Context, 8); auto Int16VT = EVT::getIntegerVT(Context, 16); @@ -453,7 +447,7 @@ TEST_F(AArch64SelectionDAGTest, isSplatValue_Scalable_SPLAT_VECTOR) { EXPECT_TRUE(DAG->isSplatValue(Op, /*AllowUndefs=*/false)); APInt UndefElts; - APInt DemandedElts(1,1); + APInt DemandedElts(1, 1); EXPECT_TRUE(DAG->isSplatValue(Op, DemandedElts, UndefElts)); } @@ -492,7 +486,8 @@ TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Fixed_BUILD_VECTOR) { EXPECT_EQ(SplatIdx, 0); } -TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Fixed_ADD_of_BUILD_VECTOR) { +TEST_F(AArch64SelectionDAGTest, + getSplatSourceVector_Fixed_ADD_of_BUILD_VECTOR) { TargetLowering TL(*TM); SDLoc Loc; @@ -525,7 +520,8 @@ TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Scalable_SPLAT_VECTOR) { EXPECT_EQ(SplatIdx, 0); } -TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Scalable_ADD_of_SPLAT_VECTOR) { +TEST_F(AArch64SelectionDAGTest, + getSplatSourceVector_Scalable_ADD_of_SPLAT_VECTOR) { TargetLowering TL(*TM); SDLoc Loc; @@ -560,7 +556,7 @@ TEST_F(AArch64SelectionDAGTest, getRepeatedSequence_Patterns) { // Build some repeating sequences. SmallVector Pattern1111, Pattern1133, Pattern0123; - for(int I = 0; I != 4; ++I) { + for (int I = 0; I != 4; ++I) { Pattern1111.append(4, Val1); Pattern1133.append(2, Val1); Pattern1133.append(2, Val3); @@ -597,7 +593,7 @@ TEST_F(AArch64SelectionDAGTest, getRepeatedSequence_Patterns) { cast(DAG->getBuildVector(VecVT, Loc, Pattern1111)); auto *BV1133 = cast(DAG->getBuildVector(VecVT, Loc, Pattern1133)); - auto *BV0123= + auto *BV0123 = cast(DAG->getBuildVector(VecVT, Loc, Pattern0123)); auto *BV022 = cast(DAG->getBuildVector(VecVT, Loc, Pattern022)); diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt index 449888838acdc..67eb508e9bab8 100644 --- a/llvm/unittests/Target/AArch64/CMakeLists.txt +++ b/llvm/unittests/Target/AArch64/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS AArch64Desc AArch64Info AArch64Utils + Analysis AsmParser CodeGen CodeGenTypes @@ -30,5 +31,6 @@ add_llvm_target_unittest(AArch64Tests SMEAttributesTest.cpp AArch64RegisterInfoTest.cpp AArch64SVESchedPseudoTest.cpp + AArch64SelectionDAGTest.cpp Immediates.cpp )