diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d205c25e73056..fb78467ac83f4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -14628,8 +14628,8 @@ static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - // Perform this optimization only in the zba extension. - if (!Subtarget.hasStdExtZba()) + // Perform this optimization only in the zba/xandesperf extension. + if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXAndesPerf()) return SDValue(); // Skip for vector types and larger types. @@ -15536,8 +15536,9 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (VT != Subtarget.getXLenVT()) return SDValue(); - const bool HasShlAdd = - Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa(); + const bool HasShlAdd = Subtarget.hasStdExtZba() || + Subtarget.hasVendorXTHeadBa() || + Subtarget.hasVendorXAndesPerf(); ConstantSDNode *CNode = dyn_cast(N->getOperand(1)); if (!CNode) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td index 2ec768435259c..4e01b93d76e80 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td @@ -135,6 +135,16 @@ class NDSRVInstRR funct7, string opcodestr> let mayStore = 0; } +class NDSRVInstLEA funct7, string opcodestr> + : RVInstR, + Sched<[WriteIALU, ReadIALU, ReadIALU]> { + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; +} + // GP: ADDI, LB, LBU class NDSRVInstLBGP funct2, string opcodestr> : RVInst<(outs GPR:$rd), (ins simm18:$imm18), @@ -321,9 +331,9 @@ def NDS_BNEC : NDSRVInstBC<0b110, "nds.bnec">; def NDS_BFOS : NDSRVInstBFO<0b011, "nds.bfos">; def NDS_BFOZ : NDSRVInstBFO<0b010, "nds.bfoz">; -def NDS_LEA_H : NDSRVInstRR<0b0000101, "nds.lea.h">; -def NDS_LEA_W : NDSRVInstRR<0b0000110, "nds.lea.w">; -def NDS_LEA_D : NDSRVInstRR<0b0000111, "nds.lea.d">; +def NDS_LEA_H : NDSRVInstLEA<0b0000101, "nds.lea.h">; +def NDS_LEA_W : NDSRVInstLEA<0b0000110, "nds.lea.w">; +def NDS_LEA_D : NDSRVInstLEA<0b0000111, "nds.lea.d">; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def NDS_ADDIGP : NDSRVInstLBGP<0b01, "nds.addigp">; @@ -345,10 +355,10 @@ def NDS_FLMISM : NDSRVInstRR<0b0010011, "nds.flmism">; } // Predicates = [HasVendorXAndesPerf] let Predicates = [HasVendorXAndesPerf, IsRV64] in { -def NDS_LEA_B_ZE : NDSRVInstRR<0b0001000, "nds.lea.b.ze">; -def NDS_LEA_H_ZE : NDSRVInstRR<0b0001001, "nds.lea.h.ze">; -def NDS_LEA_W_ZE : NDSRVInstRR<0b0001010, "nds.lea.w.ze">; -def NDS_LEA_D_ZE : NDSRVInstRR<0b0001011, "nds.lea.d.ze">; +def NDS_LEA_B_ZE : NDSRVInstLEA<0b0001000, "nds.lea.b.ze">; +def NDS_LEA_H_ZE : NDSRVInstLEA<0b0001001, "nds.lea.h.ze">; +def NDS_LEA_W_ZE : NDSRVInstLEA<0b0001010, "nds.lea.w.ze">; +def NDS_LEA_D_ZE : NDSRVInstLEA<0b0001011, "nds.lea.d.ze">; def NDS_LWUGP : NDSRVInstLWGP<0b110, "nds.lwugp">; def NDS_LDGP : NDSRVInstLDGP<0b011, "nds.ldgp">; @@ -356,3 +366,32 @@ def NDS_LDGP : NDSRVInstLDGP<0b011, "nds.ldgp">; def NDS_SDGP : NDSRVInstSDGP<0b111, "nds.sdgp">; } // Predicates = [HasVendorXAndesPerf, IsRV64] } // DecoderNamespace = "XAndes" + +// Patterns + +let Predicates = [HasVendorXAndesPerf] in { + +defm : ShxAddPat<1, NDS_LEA_H>; +defm : ShxAddPat<2, NDS_LEA_W>; +defm : ShxAddPat<3, NDS_LEA_D>; + +def : CSImm12MulBy4Pat; +def : CSImm12MulBy8Pat; +} // Predicates = [HasVendorXAndesPerf] + +let Predicates = [HasVendorXAndesPerf, IsRV64] in { + +defm : ADD_UWPat; + +defm : ShxAdd_UWPat<1, NDS_LEA_H_ZE>; +defm : ShxAdd_UWPat<2, NDS_LEA_W_ZE>; +defm : ShxAdd_UWPat<3, NDS_LEA_D_ZE>; + +defm : Sh1Add_UWPat; +defm : Sh2Add_UWPat; +defm : Sh3Add_UWPat; + +def : Sh1AddPat; +def : Sh2AddPat; +def : Sh3AddPat; +} // Predicates = [HasVendorXAndesPerf, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 2ce909c5d0e21..f78a534431fc0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -659,10 +659,7 @@ def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (PACK GPR:$rs, (XLenVT X0))>; let Predicates = [HasStdExtZbkb, NoStdExtZbb, IsRV64] in def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (PACKW GPR:$rs, (XLenVT X0))>; -let Predicates = [HasStdExtZba] in { - -foreach i = {1,2,3} in { - defvar shxadd = !cast("SH"#i#"ADD"); +multiclass ShxAddPat { def : Pat<(XLenVT (add_like_non_imm12 (shl GPR:$rs1, (XLenVT i)), GPR:$rs2)), (shxadd GPR:$rs1, GPR:$rs2)>; def : Pat<(XLenVT (riscv_shl_add GPR:$rs1, (XLenVT i), GPR:$rs2)), @@ -674,15 +671,90 @@ foreach i = {1,2,3} in { (shxadd pat:$rs1, GPR:$rs2)>; } -def : Pat<(add_like (XLenVT GPR:$r), CSImm12MulBy4:$i), - (SH2ADD (XLenVT (ADDI (XLenVT X0), CSImm12MulBy4:$i)), +class CSImm12MulBy4Pat + : Pat<(add_like (XLenVT GPR:$r), CSImm12MulBy4:$i), + (sh2add (XLenVT (ADDI (XLenVT X0), CSImm12MulBy4:$i)), GPR:$r)>; -def : Pat<(add_like (XLenVT GPR:$r), CSImm12MulBy8:$i), - (SH3ADD (XLenVT (ADDI (XLenVT X0), CSImm12MulBy8:$i)), + +class CSImm12MulBy8Pat + : Pat<(add_like (XLenVT GPR:$r), CSImm12MulBy8:$i), + (sh3add (XLenVT (ADDI (XLenVT X0), CSImm12MulBy8:$i)), GPR:$r)>; +let Predicates = [HasStdExtZba] in { +foreach i = {1,2,3} in { + defvar shxadd = !cast("SH"#i#"ADD"); + defm : ShxAddPat; +} + +def : CSImm12MulBy4Pat; +def : CSImm12MulBy8Pat; } // Predicates = [HasStdExtZba] +multiclass ADD_UWPat { + def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFFF), GPR:$rs2)), + (add_uw GPR:$rs1, GPR:$rs2)>; + def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (add_uw GPR:$rs, (XLenVT X0))>; +} + +multiclass ShxAdd_UWPat { + def : Pat<(i64 (add_like_non_imm12 (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 i)), + (XLenVT GPR:$rs2))), + (shxadd_uw GPR:$rs1, GPR:$rs2)>; + def : Pat<(i64 (riscv_shl_add (and GPR:$rs1, 0xFFFFFFFF), (i64 i), GPR:$rs2)), + (shxadd_uw GPR:$rs1, GPR:$rs2)>; + + defvar pat = !cast("sh"#i#"add_uw_op"); + // More complex cases use a ComplexPattern. + def : Pat<(i64 (add_like_non_imm12 pat:$rs1, (XLenVT GPR:$rs2))), + (shxadd_uw pat:$rs1, GPR:$rs2)>; +} + +multiclass Sh1Add_UWPat { + def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), + (XLenVT GPR:$rs2))), + (sh1add_uw GPR:$rs1, GPR:$rs2)>; + // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. + def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), + (XLenVT GPR:$rs2))), + (sh1add_uw (XLenVT (SRLI GPR:$rs1, 1)), GPR:$rs2)>; +} + +multiclass Sh2Add_UWPat { + def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), + (XLenVT GPR:$rs2))), + (sh2add_uw GPR:$rs1, GPR:$rs2)>; + // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. + def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), + (XLenVT GPR:$rs2))), + (sh2add_uw (XLenVT (SRLI GPR:$rs1, 2)), GPR:$rs2)>; +} + +multiclass Sh3Add_UWPat { + def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFF8), + (XLenVT GPR:$rs2))), + (sh3add_uw (XLenVT (SRLIW GPR:$rs1, 3)), GPR:$rs2)>; + // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. + def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), + (XLenVT GPR:$rs2))), + (sh3add_uw (XLenVT (SRLI GPR:$rs1, 3)), GPR:$rs2)>; +} + +class Sh1AddPat + : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFFE), + (XLenVT GPR:$rs2))), + (sh1add (XLenVT (SRLIW GPR:$rs1, 1)), GPR:$rs2)>; + +class Sh2AddPat + : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFFC), + (XLenVT GPR:$rs2))), + (sh2add (XLenVT (SRLIW GPR:$rs1, 2)), GPR:$rs2)>; + +class Sh3AddPat + : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFF8), + (XLenVT GPR:$rs2))), + (sh3add (XLenVT (SRLIW GPR:$rs1, 3)), GPR:$rs2)>; + let Predicates = [HasStdExtZba, IsRV64] in { def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)), (SLLI_UW GPR:$rs1, uimm5:$shamt)>; @@ -691,47 +763,21 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)), def : Pat<(i64 (and GPR:$rs1, Shifted32OnesMask:$mask)), (SLLI_UW (XLenVT (SRLI GPR:$rs1, Shifted32OnesMask:$mask)), Shifted32OnesMask:$mask)>; -def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFFF), GPR:$rs2)), - (ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, (XLenVT X0))>; -foreach i = {1,2,3} in { - defvar shxadd_uw = !cast("SH"#i#"ADD_UW"); - def : Pat<(i64 (add_like_non_imm12 (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 i)), (XLenVT GPR:$rs2))), - (shxadd_uw GPR:$rs1, GPR:$rs2)>; - def : Pat<(i64 (riscv_shl_add (and GPR:$rs1, 0xFFFFFFFF), (i64 i), GPR:$rs2)), - (shxadd_uw GPR:$rs1, GPR:$rs2)>; -} - -def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), (XLenVT GPR:$rs2))), - (SH1ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), (XLenVT GPR:$rs2))), - (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (XLenVT GPR:$rs2))), - (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; +defm : ADD_UWPat; -// More complex cases use a ComplexPattern. foreach i = {1,2,3} in { - defvar pat = !cast("sh"#i#"add_uw_op"); - def : Pat<(i64 (add_like_non_imm12 pat:$rs1, (XLenVT GPR:$rs2))), - (!cast("SH"#i#"ADD_UW") pat:$rs1, GPR:$rs2)>; + defvar shxadd_uw = !cast("SH"#i#"ADD_UW"); + defm : ShxAdd_UWPat; } -def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFFE), (XLenVT GPR:$rs2))), - (SH1ADD (XLenVT (SRLIW GPR:$rs1, 1)), GPR:$rs2)>; -def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFFC), (XLenVT GPR:$rs2))), - (SH2ADD (XLenVT (SRLIW GPR:$rs1, 2)), GPR:$rs2)>; -def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFF8), (XLenVT GPR:$rs2))), - (SH3ADD (XLenVT (SRLIW GPR:$rs1, 3)), GPR:$rs2)>; - -// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. -def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), (XLenVT GPR:$rs2))), - (SH1ADD_UW (XLenVT (SRLI GPR:$rs1, 1)), GPR:$rs2)>; -def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), (XLenVT GPR:$rs2))), - (SH2ADD_UW (XLenVT (SRLI GPR:$rs1, 2)), GPR:$rs2)>; -def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), (XLenVT GPR:$rs2))), - (SH3ADD_UW (XLenVT (SRLI GPR:$rs1, 3)), GPR:$rs2)>; +defm : Sh1Add_UWPat; +defm : Sh2Add_UWPat; +defm : Sh3Add_UWPat; +def : Sh1AddPat; +def : Sh2AddPat; +def : Sh3AddPat; } // Predicates = [HasStdExtZba, IsRV64] let Predicates = [HasStdExtZbcOrZbkc] in { diff --git a/llvm/test/CodeGen/RISCV/rv32zba.ll b/llvm/test/CodeGen/RISCV/rv32zba.ll index fec156ac2be27..240e3ac5d1461 100644 --- a/llvm/test/CodeGen/RISCV/rv32zba.ll +++ b/llvm/test/CodeGen/RISCV/rv32zba.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck %s -check-prefixes=CHECK,RV32I ; RUN: llc -mtriple=riscv32 -mattr=+m,+zba -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefixes=CHECK,RV32ZBA +; RUN: llc -mtriple=riscv32 -mattr=+m,+xandesperf -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=CHECK,RV32XANDESPERF define signext i16 @sh1add(i64 %0, ptr %1) { ; RV32I-LABEL: sh1add: @@ -17,6 +19,12 @@ define signext i16 @sh1add(i64 %0, ptr %1) { ; RV32ZBA-NEXT: sh1add a0, a0, a2 ; RV32ZBA-NEXT: lh a0, 0(a0) ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: sh1add: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.h a0, a2, a0 +; RV32XANDESPERF-NEXT: lh a0, 0(a0) +; RV32XANDESPERF-NEXT: ret %3 = getelementptr inbounds i16, ptr %1, i64 %0 %4 = load i16, ptr %3 ret i16 %4 @@ -35,6 +43,12 @@ define i32 @sh2add(i64 %0, ptr %1) { ; RV32ZBA-NEXT: sh2add a0, a0, a2 ; RV32ZBA-NEXT: lw a0, 0(a0) ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: sh2add: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a0, a2, a0 +; RV32XANDESPERF-NEXT: lw a0, 0(a0) +; RV32XANDESPERF-NEXT: ret %3 = getelementptr inbounds i32, ptr %1, i64 %0 %4 = load i32, ptr %3 ret i32 %4 @@ -55,6 +69,13 @@ define i64 @sh3add(i64 %0, ptr %1) { ; RV32ZBA-NEXT: lw a0, 0(a1) ; RV32ZBA-NEXT: lw a1, 4(a1) ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: sh3add: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.d a1, a2, a0 +; RV32XANDESPERF-NEXT: lw a0, 0(a1) +; RV32XANDESPERF-NEXT: lw a1, 4(a1) +; RV32XANDESPERF-NEXT: ret %3 = getelementptr inbounds i64, ptr %1, i64 %0 %4 = load i64, ptr %3 ret i64 %4 @@ -74,6 +95,12 @@ define i32 @addmul6(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh1add a0, a0, a0 ; RV32ZBA-NEXT: sh1add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addmul6: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 6 %d = add i32 %c, %b ret i32 %d @@ -92,6 +119,12 @@ define i32 @addmul10(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh2add a0, a0, a0 ; RV32ZBA-NEXT: sh1add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addmul10: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 10 %d = add i32 %c, %b ret i32 %d @@ -111,6 +144,12 @@ define i32 @addmul12(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh1add a0, a0, a0 ; RV32ZBA-NEXT: sh2add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addmul12: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 12 %d = add i32 %c, %b ret i32 %d @@ -129,6 +168,12 @@ define i32 @addmul18(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh3add a0, a0, a0 ; RV32ZBA-NEXT: sh1add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addmul18: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 18 %d = add i32 %c, %b ret i32 %d @@ -147,6 +192,12 @@ define i32 @addmul20(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh2add a0, a0, a0 ; RV32ZBA-NEXT: sh2add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addmul20: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 20 %d = add i32 %c, %b ret i32 %d @@ -166,6 +217,12 @@ define i32 @addmul24(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh1add a0, a0, a0 ; RV32ZBA-NEXT: sh3add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addmul24: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 24 %d = add i32 %c, %b ret i32 %d @@ -184,6 +241,12 @@ define i32 @addmul36(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh3add a0, a0, a0 ; RV32ZBA-NEXT: sh2add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addmul36: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 36 %d = add i32 %c, %b ret i32 %d @@ -202,6 +265,12 @@ define i32 @addmul40(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh2add a0, a0, a0 ; RV32ZBA-NEXT: sh3add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addmul40: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 40 %d = add i32 %c, %b ret i32 %d @@ -220,6 +289,12 @@ define i32 @addmul72(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh3add a0, a0, a0 ; RV32ZBA-NEXT: sh3add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addmul72: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 72 %d = add i32 %c, %b ret i32 %d @@ -238,6 +313,12 @@ define i32 @mul96(i32 %a) { ; RV32ZBA-NEXT: sh1add a0, a0, a0 ; RV32ZBA-NEXT: slli a0, a0, 5 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul96: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV32XANDESPERF-NEXT: slli a0, a0, 5 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 96 ret i32 %c } @@ -254,6 +335,12 @@ define i32 @mul160(i32 %a) { ; RV32ZBA-NEXT: sh2add a0, a0, a0 ; RV32ZBA-NEXT: slli a0, a0, 5 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul160: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV32XANDESPERF-NEXT: slli a0, a0, 5 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 160 ret i32 %c } @@ -270,6 +357,12 @@ define i32 @mul288(i32 %a) { ; RV32ZBA-NEXT: sh3add a0, a0, a0 ; RV32ZBA-NEXT: slli a0, a0, 5 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul288: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV32XANDESPERF-NEXT: slli a0, a0, 5 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 288 ret i32 %c } @@ -286,6 +379,12 @@ define i32 @mul258(i32 %a) { ; RV32ZBA-NEXT: slli a1, a0, 8 ; RV32ZBA-NEXT: sh1add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul258: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: slli a1, a0, 8 +; RV32XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 258 ret i32 %c } @@ -302,6 +401,12 @@ define i32 @mul260(i32 %a) { ; RV32ZBA-NEXT: slli a1, a0, 8 ; RV32ZBA-NEXT: sh2add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul260: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: slli a1, a0, 8 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 260 ret i32 %c } @@ -318,6 +423,12 @@ define i32 @mul264(i32 %a) { ; RV32ZBA-NEXT: slli a1, a0, 8 ; RV32ZBA-NEXT: sh3add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul264: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: slli a1, a0, 8 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 264 ret i32 %c } @@ -334,6 +445,12 @@ define i32 @mul11(i32 %a) { ; RV32ZBA-NEXT: sh2add a1, a0, a0 ; RV32ZBA-NEXT: sh1add a0, a1, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul11: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a1, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 11 ret i32 %c } @@ -350,6 +467,12 @@ define i32 @mul19(i32 %a) { ; RV32ZBA-NEXT: sh3add a1, a0, a0 ; RV32ZBA-NEXT: sh1add a0, a1, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul19: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.d a1, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 19 ret i32 %c } @@ -366,6 +489,12 @@ define i32 @mul13(i32 %a) { ; RV32ZBA-NEXT: sh1add a1, a0, a0 ; RV32ZBA-NEXT: sh2add a0, a1, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul13: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.h a1, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 13 ret i32 %c } @@ -382,6 +511,12 @@ define i32 @mul21(i32 %a) { ; RV32ZBA-NEXT: sh2add a1, a0, a0 ; RV32ZBA-NEXT: sh2add a0, a1, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul21: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a1, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 21 ret i32 %c } @@ -398,6 +533,12 @@ define i32 @mul37(i32 %a) { ; RV32ZBA-NEXT: sh3add a1, a0, a0 ; RV32ZBA-NEXT: sh2add a0, a1, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul37: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.d a1, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 37 ret i32 %c } @@ -414,6 +555,12 @@ define i32 @mul25(i32 %a) { ; RV32ZBA-NEXT: sh2add a0, a0, a0 ; RV32ZBA-NEXT: sh2add a0, a0, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul25: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 25 ret i32 %c } @@ -430,6 +577,12 @@ define i32 @mul41(i32 %a) { ; RV32ZBA-NEXT: sh2add a1, a0, a0 ; RV32ZBA-NEXT: sh3add a0, a1, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul41: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a1, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 41 ret i32 %c } @@ -446,6 +599,12 @@ define i32 @mul73(i32 %a) { ; RV32ZBA-NEXT: sh3add a1, a0, a0 ; RV32ZBA-NEXT: sh3add a0, a1, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul73: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.d a1, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 73 ret i32 %c } @@ -462,6 +621,12 @@ define i32 @mul27(i32 %a) { ; RV32ZBA-NEXT: sh1add a0, a0, a0 ; RV32ZBA-NEXT: sh3add a0, a0, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul27: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 27 ret i32 %c } @@ -478,6 +643,12 @@ define i32 @mul45(i32 %a) { ; RV32ZBA-NEXT: sh2add a0, a0, a0 ; RV32ZBA-NEXT: sh3add a0, a0, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul45: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 45 ret i32 %c } @@ -494,6 +665,12 @@ define i32 @mul81(i32 %a) { ; RV32ZBA-NEXT: sh3add a0, a0, a0 ; RV32ZBA-NEXT: sh3add a0, a0, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul81: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 81 ret i32 %c } @@ -511,6 +688,12 @@ define i32 @mul4098(i32 %a) { ; RV32ZBA-NEXT: slli a1, a0, 12 ; RV32ZBA-NEXT: sh1add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul4098: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: slli a1, a0, 12 +; RV32XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 4098 ret i32 %c } @@ -528,6 +711,12 @@ define i32 @mul4100(i32 %a) { ; RV32ZBA-NEXT: slli a1, a0, 12 ; RV32ZBA-NEXT: sh2add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul4100: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: slli a1, a0, 12 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 4100 ret i32 %c } @@ -545,6 +734,12 @@ define i32 @mul4104(i32 %a) { ; RV32ZBA-NEXT: slli a1, a0, 12 ; RV32ZBA-NEXT: sh3add a0, a0, a1 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul4104: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: slli a1, a0, 12 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, 4104 ret i32 %c } @@ -562,6 +757,12 @@ define i32 @add4104(i32 %a) { ; RV32ZBA-NEXT: li a1, 1026 ; RV32ZBA-NEXT: sh2add a0, a1, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: add4104: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: li a1, 1026 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV32XANDESPERF-NEXT: ret %c = add i32 %a, 4104 ret i32 %c } @@ -579,6 +780,12 @@ define i32 @add8208(i32 %a) { ; RV32ZBA-NEXT: li a1, 1026 ; RV32ZBA-NEXT: sh3add a0, a1, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: add8208: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: li a1, 1026 +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV32XANDESPERF-NEXT: ret %c = add i32 %a, 8208 ret i32 %c } @@ -606,6 +813,12 @@ define i32 @addshl_5_6(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh1add a0, a1, a0 ; RV32ZBA-NEXT: slli a0, a0, 5 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addshl_5_6: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV32XANDESPERF-NEXT: slli a0, a0, 5 +; RV32XANDESPERF-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 6 %e = add i32 %c, %d @@ -625,6 +838,12 @@ define i32 @addshl_5_7(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh2add a0, a1, a0 ; RV32ZBA-NEXT: slli a0, a0, 5 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addshl_5_7: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV32XANDESPERF-NEXT: slli a0, a0, 5 +; RV32XANDESPERF-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 7 %e = add i32 %c, %d @@ -644,6 +863,12 @@ define i32 @addshl_5_8(i32 %a, i32 %b) { ; RV32ZBA-NEXT: sh3add a0, a1, a0 ; RV32ZBA-NEXT: slli a0, a0, 5 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: addshl_5_8: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV32XANDESPERF-NEXT: slli a0, a0, 5 +; RV32XANDESPERF-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 8 %e = add i32 %c, %d @@ -665,6 +890,13 @@ define i32 @srli_1_sh2add(ptr %0, i32 %1) { ; RV32ZBA-NEXT: sh2add a0, a1, a0 ; RV32ZBA-NEXT: lw a0, 0(a0) ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: srli_1_sh2add: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: srli a1, a1, 1 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV32XANDESPERF-NEXT: lw a0, 0(a0) +; RV32XANDESPERF-NEXT: ret %3 = lshr i32 %1, 1 %4 = getelementptr inbounds i32, ptr %0, i32 %3 %5 = load i32, ptr %4, align 4 @@ -688,6 +920,14 @@ define i64 @srli_2_sh3add(ptr %0, i32 %1) { ; RV32ZBA-NEXT: lw a0, 0(a1) ; RV32ZBA-NEXT: lw a1, 4(a1) ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: srli_2_sh3add: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: srli a1, a1, 2 +; RV32XANDESPERF-NEXT: nds.lea.d a1, a0, a1 +; RV32XANDESPERF-NEXT: lw a0, 0(a1) +; RV32XANDESPERF-NEXT: lw a1, 4(a1) +; RV32XANDESPERF-NEXT: ret %3 = lshr i32 %1, 2 %4 = getelementptr inbounds i64, ptr %0, i32 %3 %5 = load i64, ptr %4, align 8 @@ -709,6 +949,13 @@ define signext i16 @srli_2_sh1add(ptr %0, i32 %1) { ; RV32ZBA-NEXT: sh1add a0, a1, a0 ; RV32ZBA-NEXT: lh a0, 0(a0) ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: srli_2_sh1add: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: srli a1, a1, 2 +; RV32XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV32XANDESPERF-NEXT: lh a0, 0(a0) +; RV32XANDESPERF-NEXT: ret %3 = lshr i32 %1, 2 %4 = getelementptr inbounds i16, ptr %0, i32 %3 %5 = load i16, ptr %4, align 2 @@ -730,6 +977,13 @@ define i32 @srli_3_sh2add(ptr %0, i32 %1) { ; RV32ZBA-NEXT: sh2add a0, a1, a0 ; RV32ZBA-NEXT: lw a0, 0(a0) ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: srli_3_sh2add: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: srli a1, a1, 3 +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV32XANDESPERF-NEXT: lw a0, 0(a0) +; RV32XANDESPERF-NEXT: ret %3 = lshr i32 %1, 3 %4 = getelementptr inbounds i32, ptr %0, i32 %3 %5 = load i32, ptr %4, align 4 @@ -753,6 +1007,14 @@ define i64 @srli_4_sh3add(ptr %0, i32 %1) { ; RV32ZBA-NEXT: lw a0, 0(a1) ; RV32ZBA-NEXT: lw a1, 4(a1) ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: srli_4_sh3add: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: srli a1, a1, 4 +; RV32XANDESPERF-NEXT: nds.lea.d a1, a0, a1 +; RV32XANDESPERF-NEXT: lw a0, 0(a1) +; RV32XANDESPERF-NEXT: lw a1, 4(a1) +; RV32XANDESPERF-NEXT: ret %3 = lshr i32 %1, 4 %4 = getelementptr inbounds i64, ptr %0, i32 %3 %5 = load i64, ptr %4, align 8 @@ -791,6 +1053,12 @@ define i32 @mul_neg3(i32 %a) { ; RV32ZBA-NEXT: sh1add a0, a0, a0 ; RV32ZBA-NEXT: neg a0, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul_neg3: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV32XANDESPERF-NEXT: neg a0, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, -3 ret i32 %c } @@ -818,6 +1086,12 @@ define i32 @mul_neg5(i32 %a) { ; RV32ZBA-NEXT: sh2add a0, a0, a0 ; RV32ZBA-NEXT: neg a0, a0 ; RV32ZBA-NEXT: ret +; +; RV32XANDESPERF-LABEL: mul_neg5: +; RV32XANDESPERF: # %bb.0: +; RV32XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV32XANDESPERF-NEXT: neg a0, a0 +; RV32XANDESPERF-NEXT: ret %c = mul i32 %a, -5 ret i32 %c } diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 9760821832b37..c02ecb3178900 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -7,6 +7,8 @@ ; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB,RV64ZBAZBBNOZBS ; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbs -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB,RV64ZBAZBBZBS +; RUN: llc -mtriple=riscv64 -mattr=+m,+xandesperf -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=CHECK,RV64XANDESPERF define i64 @slliuw(i64 %a) nounwind { ; RV64I-LABEL: slliuw: @@ -19,6 +21,12 @@ define i64 @slliuw(i64 %a) nounwind { ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: slli.uw a0, a0, 1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: slliuw: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: srli a0, a0, 31 +; RV64XANDESPERF-NEXT: ret %conv1 = shl i64 %a, 1 %shl = and i64 %conv1, 8589934590 ret i64 %shl @@ -41,6 +49,15 @@ define i128 @slliuw_2(i32 signext %0, ptr %1) { ; RV64ZBA-NEXT: ld a0, 0(a1) ; RV64ZBA-NEXT: ld a1, 8(a1) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: slliuw_2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: srli a0, a0, 28 +; RV64XANDESPERF-NEXT: add a1, a1, a0 +; RV64XANDESPERF-NEXT: ld a0, 0(a1) +; RV64XANDESPERF-NEXT: ld a1, 8(a1) +; RV64XANDESPERF-NEXT: ret %3 = zext i32 %0 to i64 %4 = getelementptr inbounds i128, ptr %1, i64 %3 %5 = load i128, ptr %4 @@ -59,6 +76,11 @@ define i64 @adduw(i64 %a, i64 %b) nounwind { ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add.uw a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: adduw: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %and = and i64 %b, 4294967295 %add = add i64 %and, %a ret i64 %add @@ -78,6 +100,12 @@ define signext i8 @adduw_2(i32 signext %0, ptr %1) { ; RV64ZBA-NEXT: add.uw a0, a0, a1 ; RV64ZBA-NEXT: lb a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: adduw_2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: lb a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = zext i32 %0 to i64 %4 = getelementptr inbounds i8, ptr %1, i64 %3 %5 = load i8, ptr %4 @@ -95,6 +123,11 @@ define i64 @zextw_i64(i64 %a) nounwind { ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: zext.w a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: zextw_i64: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, zero, a0 +; RV64XANDESPERF-NEXT: ret %and = and i64 %a, 4294967295 ret i64 %and } @@ -114,6 +147,12 @@ define i64 @zextw_demandedbits_i64(i64 %0) { ; RV64ZBA-NEXT: ori a0, a0, 1 ; RV64ZBA-NEXT: zext.w a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: zextw_demandedbits_i64: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: ori a0, a0, 1 +; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, zero, a0 +; RV64XANDESPERF-NEXT: ret %2 = and i64 %0, 4294967294 %3 = or i64 %2, 1 ret i64 %3 @@ -132,6 +171,12 @@ define signext i16 @sh1add(i64 %0, ptr %1) { ; RV64ZBA-NEXT: sh1add a0, a0, a1 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh1add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = getelementptr inbounds i16, ptr %1, i64 %0 %4 = load i16, ptr %3 ret i16 %4 @@ -150,6 +195,12 @@ define signext i32 @sh2add(i64 %0, ptr %1) { ; RV64ZBA-NEXT: sh2add a0, a0, a1 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh2add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = getelementptr inbounds i32, ptr %1, i64 %0 %4 = load i32, ptr %3 ret i32 %4 @@ -168,6 +219,12 @@ define i64 @sh3add(i64 %0, ptr %1) { ; RV64ZBA-NEXT: sh3add a0, a0, a1 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh3add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = getelementptr inbounds i64, ptr %1, i64 %0 %4 = load i64, ptr %3 ret i64 %4 @@ -187,6 +244,12 @@ define signext i16 @sh1adduw(i32 signext %0, ptr %1) { ; RV64ZBA-NEXT: sh1add.uw a0, a0, a1 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh1adduw: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = zext i32 %0 to i64 %4 = getelementptr inbounds i16, ptr %1, i64 %3 %5 = load i16, ptr %4 @@ -205,6 +268,11 @@ define i64 @sh1adduw_2(i64 %0, i64 %1) { ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sh1add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh1adduw_2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %3 = shl i64 %0, 1 %4 = and i64 %3, 8589934590 %5 = add i64 %4, %1 @@ -223,6 +291,11 @@ define i64 @sh1adduw_3(i64 %0, i64 %1) { ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sh1add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh1adduw_3: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %3 = shl i64 %0, 1 %4 = and i64 %3, 8589934590 %5 = or disjoint i64 %4, %1 @@ -243,6 +316,12 @@ define signext i32 @sh2adduw(i32 signext %0, ptr %1) { ; RV64ZBA-NEXT: sh2add.uw a0, a0, a1 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh2adduw: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = zext i32 %0 to i64 %4 = getelementptr inbounds i32, ptr %1, i64 %3 %5 = load i32, ptr %4 @@ -261,6 +340,11 @@ define i64 @sh2adduw_2(i64 %0, i64 %1) { ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sh2add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh2adduw_2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %3 = shl i64 %0, 2 %4 = and i64 %3, 17179869180 %5 = add i64 %4, %1 @@ -279,6 +363,11 @@ define i64 @sh2adduw_3(i64 %0, i64 %1) { ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sh2add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh2adduw_3: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %3 = shl i64 %0, 2 %4 = and i64 %3, 17179869180 %5 = or disjoint i64 %4, %1 @@ -299,6 +388,12 @@ define i64 @sh3adduw(i32 signext %0, ptr %1) { ; RV64ZBA-NEXT: sh3add.uw a0, a0, a1 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh3adduw: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = zext i32 %0 to i64 %4 = getelementptr inbounds i64, ptr %1, i64 %3 %5 = load i64, ptr %4 @@ -315,8 +410,17 @@ define i64 @sh3adduw_2(i64 %0, i64 %1) { ; ; RV64ZBA-LABEL: sh3adduw_2: ; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a0, a0, 3 +; RV64ZBA-NEXT: srli a0, a0, 3 ; RV64ZBA-NEXT: sh3add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh3adduw_2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 3 +; RV64XANDESPERF-NEXT: srli a0, a0, 3 +; RV64XANDESPERF-NEXT: nds.lea.d.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %3 = shl i64 %0, 3 %4 = and i64 %3, 34359738360 %5 = add i64 %4, %1 @@ -333,8 +437,17 @@ define i64 @sh3adduw_3(i64 %0, i64 %1) { ; ; RV64ZBA-LABEL: sh3adduw_3: ; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a0, a0, 3 +; RV64ZBA-NEXT: srli a0, a0, 3 ; RV64ZBA-NEXT: sh3add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh3adduw_3: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 3 +; RV64XANDESPERF-NEXT: srli a0, a0, 3 +; RV64XANDESPERF-NEXT: nds.lea.d.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %3 = shl i64 %0, 3 %4 = and i64 %3, 34359738360 %5 = or disjoint i64 %4, %1 @@ -363,6 +476,14 @@ define i64 @sh2add_extra_sext(i32 %x, i32 %y, i32 %z) { ; RV64ZBA-NEXT: sraiw a0, a0, 2 ; RV64ZBA-NEXT: mul a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh2add_extra_sext: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV64XANDESPERF-NEXT: sllw a1, a2, a0 +; RV64XANDESPERF-NEXT: sraiw a0, a0, 2 +; RV64XANDESPERF-NEXT: mul a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %a = shl i32 %x, 2 %b = add i32 %a, %y %c = shl i32 %z, %b @@ -387,6 +508,12 @@ define i64 @addmul6(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: sh1add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul6: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 6 %d = add i64 %c, %b ret i64 %d @@ -406,6 +533,12 @@ define i64 @disjointormul6(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: sh1add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: disjointormul6: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 6 %d = or disjoint i64 %c, %b ret i64 %d @@ -424,6 +557,12 @@ define i64 @addmul10(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: sh1add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul10: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 10 %d = add i64 %c, %b ret i64 %d @@ -443,6 +582,12 @@ define i64 @addmul12(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: sh2add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul12: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 12 %d = add i64 %c, %b ret i64 %d @@ -461,6 +606,12 @@ define i64 @addmul18(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: sh1add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul18: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 18 %d = add i64 %c, %b ret i64 %d @@ -479,6 +630,12 @@ define i64 @addmul20(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: sh2add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul20: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 20 %d = add i64 %c, %b ret i64 %d @@ -510,6 +667,12 @@ define i64 @addmul24(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: sh3add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul24: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 24 %d = add i64 %c, %b ret i64 %d @@ -528,6 +691,12 @@ define i64 @addmul36(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: sh2add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul36: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 36 %d = add i64 %c, %b ret i64 %d @@ -546,6 +715,12 @@ define i64 @addmul40(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: sh3add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul40: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 40 %d = add i64 %c, %b ret i64 %d @@ -564,6 +739,12 @@ define i64 @addmul72(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: sh3add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul72: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 72 %d = add i64 %c, %b ret i64 %d @@ -582,6 +763,13 @@ define i64 @mul50(i64 %a) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul50: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 50 ret i64 %c } @@ -600,6 +788,13 @@ define i64 @addmul50(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: sh1add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul50: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 50 %d = add i64 %c, %b ret i64 %d @@ -618,6 +813,13 @@ define i64 @mul100(i64 %a) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 2 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul100: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 2 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 100 ret i64 %c } @@ -636,6 +838,13 @@ define i64 @addmul100(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: sh2add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul100: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 100 %d = add i64 %c, %b ret i64 %d @@ -654,6 +863,13 @@ define i64 @mul162(i64 %a) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul162: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 162 ret i64 %c } @@ -672,6 +888,13 @@ define i64 @addmul162(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: sh1add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul162: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 162 %d = add i64 %c, %b ret i64 %d @@ -690,6 +913,13 @@ define i64 @mul180(i64 %a) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 2 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul180: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 2 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 180 ret i64 %c } @@ -708,6 +938,13 @@ define i64 @addmul180(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: sh2add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul180: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 180 %d = add i64 %c, %b ret i64 %d @@ -728,6 +965,14 @@ define i64 @add255mul180(i64 %a) { ; RV64ZBA-NEXT: slli a0, a0, 2 ; RV64ZBA-NEXT: addi a0, a0, 255 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: add255mul180: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 2 +; RV64XANDESPERF-NEXT: addi a0, a0, 255 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 180 %d = add i64 %c, 255 ret i64 %d @@ -746,6 +991,13 @@ define i64 @mul200(i64 %a) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 3 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul200: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 3 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 200 ret i64 %c } @@ -764,6 +1016,13 @@ define i64 @addmul200(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: sh3add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul200: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 200 %d = add i64 %c, %b ret i64 %d @@ -806,6 +1065,12 @@ define i64 @mul96(i64 %a) { ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 5 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul96: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 5 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 96 ret i64 %c } @@ -823,6 +1088,13 @@ define i64 @mul119(i64 %a) { ; RV64ZBA-NEXT: slli a0, a0, 7 ; RV64ZBA-NEXT: sub a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul119: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a1, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 7 +; RV64XANDESPERF-NEXT: sub a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 119 ret i64 %c } @@ -840,6 +1112,13 @@ define i64 @mul123(i64 %a) { ; RV64ZBA-NEXT: slli a0, a0, 7 ; RV64ZBA-NEXT: sub a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul123: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a1, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 7 +; RV64XANDESPERF-NEXT: sub a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 123 ret i64 %c } @@ -857,6 +1136,13 @@ define i64 @mul125(i64 %a) { ; RV64ZBA-NEXT: slli a0, a0, 7 ; RV64ZBA-NEXT: sub a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul125: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a1, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 7 +; RV64XANDESPERF-NEXT: sub a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 125 ret i64 %c } @@ -874,6 +1160,13 @@ define i64 @mul131(i64 %a) { ; RV64ZBA-NEXT: slli a0, a0, 7 ; RV64ZBA-NEXT: add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul131: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a1, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 7 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 131 ret i64 %c } @@ -891,6 +1184,13 @@ define i64 @mul133(i64 %a) { ; RV64ZBA-NEXT: slli a0, a0, 7 ; RV64ZBA-NEXT: add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul133: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a1, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 7 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 133 ret i64 %c } @@ -908,6 +1208,13 @@ define i64 @mul137(i64 %a) { ; RV64ZBA-NEXT: slli a0, a0, 7 ; RV64ZBA-NEXT: add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul137: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a1, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 7 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 137 ret i64 %c } @@ -924,6 +1231,12 @@ define i64 @mul160(i64 %a) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 5 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul160: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 5 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 160 ret i64 %c } @@ -940,6 +1253,12 @@ define i64 @mul288(i64 %a) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 5 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul288: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 5 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 288 ret i64 %c } @@ -958,6 +1277,13 @@ define i64 @zext_mul68(i32 signext %a) { ; RV64ZBA-NEXT: slli.uw a1, a0, 6 ; RV64ZBA-NEXT: sh2add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: zext_mul68: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a0, 32 +; RV64XANDESPERF-NEXT: srli a1, a1, 26 +; RV64XANDESPERF-NEXT: nds.lea.w.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 68 ret i64 %c @@ -977,6 +1303,13 @@ define i64 @zext_mul96(i32 signext %a) { ; RV64ZBA-NEXT: slli.uw a0, a0, 5 ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: zext_mul96: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: srli a0, a0, 27 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 96 ret i64 %c @@ -996,6 +1329,13 @@ define i64 @zext_mul160(i32 signext %a) { ; RV64ZBA-NEXT: slli.uw a0, a0, 5 ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: zext_mul160: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: srli a0, a0, 27 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 160 ret i64 %c @@ -1015,6 +1355,13 @@ define i64 @zext_mul288(i32 signext %a) { ; RV64ZBA-NEXT: slli.uw a0, a0, 5 ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: zext_mul288: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: srli a0, a0, 27 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 288 ret i64 %c @@ -1034,6 +1381,12 @@ define i64 @zext_mul12884901888(i32 signext %a) { ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: zext_mul12884901888: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 12884901888 ret i64 %c @@ -1053,6 +1406,12 @@ define i64 @zext_mul21474836480(i32 signext %a) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: zext_mul21474836480: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 21474836480 ret i64 %c @@ -1072,6 +1431,12 @@ define i64 @zext_mul38654705664(i32 signext %a) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: zext_mul38654705664: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 38654705664 ret i64 %c @@ -1123,6 +1488,13 @@ define i64 @sh1adduw_imm(i32 signext %0) { ; RV64ZBA-NEXT: slli.uw a0, a0, 1 ; RV64ZBA-NEXT: addi a0, a0, 11 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh1adduw_imm: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: srli a0, a0, 31 +; RV64XANDESPERF-NEXT: addi a0, a0, 11 +; RV64XANDESPERF-NEXT: ret %a = zext i32 %0 to i64 %b = shl i64 %a, 1 %c = add i64 %b, 11 @@ -1142,6 +1514,13 @@ define i64 @sh2adduw_imm(i32 signext %0) { ; RV64ZBA-NEXT: slli.uw a0, a0, 2 ; RV64ZBA-NEXT: addi a0, a0, -12 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh2adduw_imm: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: srli a0, a0, 30 +; RV64XANDESPERF-NEXT: addi a0, a0, -12 +; RV64XANDESPERF-NEXT: ret %a = zext i32 %0 to i64 %b = shl i64 %a, 2 %c = add i64 %b, -12 @@ -1161,6 +1540,13 @@ define i64 @sh3adduw_imm(i32 signext %0) { ; RV64ZBA-NEXT: slli.uw a0, a0, 3 ; RV64ZBA-NEXT: addi a0, a0, 13 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh3adduw_imm: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: srli a0, a0, 29 +; RV64XANDESPERF-NEXT: addi a0, a0, 13 +; RV64XANDESPERF-NEXT: ret %a = zext i32 %0 to i64 %b = shl i64 %a, 3 %c = add i64 %b, 13 @@ -1180,6 +1566,12 @@ define i64 @adduw_imm(i32 signext %0) nounwind { ; RV64ZBA-NEXT: zext.w a0, a0 ; RV64ZBA-NEXT: addi a0, a0, 5 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: adduw_imm: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, zero, a0 +; RV64XANDESPERF-NEXT: addi a0, a0, 5 +; RV64XANDESPERF-NEXT: ret %a = zext i32 %0 to i64 %b = add i64 %a, 5 ret i64 %b @@ -1197,6 +1589,12 @@ define i64 @mul258(i64 %a) { ; RV64ZBA-NEXT: slli a1, a0, 8 ; RV64ZBA-NEXT: sh1add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul258: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a0, 8 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 258 ret i64 %c } @@ -1213,6 +1611,12 @@ define i64 @mul260(i64 %a) { ; RV64ZBA-NEXT: slli a1, a0, 8 ; RV64ZBA-NEXT: sh2add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul260: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a0, 8 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 260 ret i64 %c } @@ -1229,6 +1633,12 @@ define i64 @mul264(i64 %a) { ; RV64ZBA-NEXT: slli a1, a0, 8 ; RV64ZBA-NEXT: sh3add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul264: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a0, 8 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 264 ret i64 %c } @@ -1246,6 +1656,13 @@ define i64 @imm_zextw() nounwind { ; RV64ZBA-NEXT: li a0, -2 ; RV64ZBA-NEXT: zext.w a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: imm_zextw: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: li a0, 1 +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: addi a0, a0, -2 +; RV64XANDESPERF-NEXT: ret ret i64 4294967294 ; -2 in 32 bits. } @@ -1261,6 +1678,12 @@ define i64 @mul11(i64 %a) { ; RV64ZBA-NEXT: sh2add a1, a0, a0 ; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul11: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a1, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 11 ret i64 %c } @@ -1277,6 +1700,12 @@ define i64 @mul19(i64 %a) { ; RV64ZBA-NEXT: sh3add a1, a0, a0 ; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul19: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a1, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 19 ret i64 %c } @@ -1293,6 +1722,12 @@ define i64 @mul13(i64 %a) { ; RV64ZBA-NEXT: sh1add a1, a0, a0 ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul13: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a1, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 13 ret i64 %c } @@ -1309,6 +1744,12 @@ define i64 @mul21(i64 %a) { ; RV64ZBA-NEXT: sh2add a1, a0, a0 ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul21: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a1, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 21 ret i64 %c } @@ -1325,6 +1766,12 @@ define i64 @mul37(i64 %a) { ; RV64ZBA-NEXT: sh3add a1, a0, a0 ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul37: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a1, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 37 ret i64 %c } @@ -1341,6 +1788,12 @@ define i64 @mul25(i64 %a) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul25: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 25 ret i64 %c } @@ -1357,6 +1810,12 @@ define i64 @mul41(i64 %a) { ; RV64ZBA-NEXT: sh2add a1, a0, a0 ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul41: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a1, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 41 ret i64 %c } @@ -1373,6 +1832,12 @@ define i64 @mul73(i64 %a) { ; RV64ZBA-NEXT: sh3add a1, a0, a0 ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul73: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a1, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 73 ret i64 %c } @@ -1389,6 +1854,12 @@ define i64 @mul27(i64 %a) { ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul27: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 27 ret i64 %c } @@ -1405,6 +1876,12 @@ define i64 @mul45(i64 %a) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul45: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 45 ret i64 %c } @@ -1421,6 +1898,12 @@ define i64 @mul81(i64 %a) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul81: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 81 ret i64 %c } @@ -1438,6 +1921,12 @@ define i64 @mul4098(i64 %a) { ; RV64ZBA-NEXT: slli a1, a0, 12 ; RV64ZBA-NEXT: sh1add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul4098: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a0, 12 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 4098 ret i64 %c } @@ -1455,6 +1944,12 @@ define i64 @mul4100(i64 %a) { ; RV64ZBA-NEXT: slli a1, a0, 12 ; RV64ZBA-NEXT: sh2add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul4100: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a0, 12 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 4100 ret i64 %c } @@ -1472,6 +1967,12 @@ define i64 @mul4104(i64 %a) { ; RV64ZBA-NEXT: slli a1, a0, 12 ; RV64ZBA-NEXT: sh3add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul4104: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a0, 12 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 4104 ret i64 %c } @@ -1489,6 +1990,12 @@ define signext i32 @mulw192(i32 signext %a) { ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: slliw a0, a0, 6 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mulw192: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: slliw a0, a0, 6 +; RV64XANDESPERF-NEXT: ret %c = mul i32 %a, 192 ret i32 %c } @@ -1505,6 +2012,12 @@ define signext i32 @mulw320(i32 signext %a) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: slliw a0, a0, 6 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mulw320: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: slliw a0, a0, 6 +; RV64XANDESPERF-NEXT: ret %c = mul i32 %a, 320 ret i32 %c } @@ -1521,6 +2034,12 @@ define signext i32 @mulw576(i32 signext %a) { ; RV64ZBA-NEXT: sh3add a0, a0, a0 ; RV64ZBA-NEXT: slliw a0, a0, 6 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mulw576: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: slliw a0, a0, 6 +; RV64XANDESPERF-NEXT: ret %c = mul i32 %a, 576 ret i32 %c } @@ -1538,6 +2057,12 @@ define i64 @add4104(i64 %a) { ; RV64ZBA-NEXT: li a1, 1026 ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: add4104: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: li a1, 1026 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = add i64 %a, 4104 ret i64 %c } @@ -1555,6 +2080,12 @@ define i64 @add4104_2(i64 %a) { ; RV64ZBA-NEXT: li a1, 1026 ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: add4104_2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: li a1, 1026 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = or disjoint i64 %a, 4104 ret i64 %c } @@ -1572,6 +2103,12 @@ define i64 @add8208(i64 %a) { ; RV64ZBA-NEXT: li a1, 1026 ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: add8208: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: li a1, 1026 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %c = add i64 %a, 8208 ret i64 %c } @@ -1611,6 +2148,12 @@ define signext i32 @addshl32_5_6(i32 signext %a, i32 signext %b) { ; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: slliw a0, a0, 5 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addshl32_5_6: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV64XANDESPERF-NEXT: slliw a0, a0, 5 +; RV64XANDESPERF-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 6 %e = add i32 %c, %d @@ -1630,6 +2173,12 @@ define i64 @addshl64_5_6(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: slli a0, a0, 5 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addshl64_5_6: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV64XANDESPERF-NEXT: slli a0, a0, 5 +; RV64XANDESPERF-NEXT: ret %c = shl i64 %a, 5 %d = shl i64 %b, 6 %e = add i64 %c, %d @@ -1649,6 +2198,12 @@ define signext i32 @addshl32_5_7(i32 signext %a, i32 signext %b) { ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: slliw a0, a0, 5 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addshl32_5_7: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: slliw a0, a0, 5 +; RV64XANDESPERF-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 7 %e = add i32 %c, %d @@ -1668,6 +2223,12 @@ define i64 @addshl64_5_7(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: slli a0, a0, 5 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addshl64_5_7: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: slli a0, a0, 5 +; RV64XANDESPERF-NEXT: ret %c = shl i64 %a, 5 %d = shl i64 %b, 7 %e = add i64 %c, %d @@ -1687,6 +2248,12 @@ define signext i32 @addshl32_5_8(i32 signext %a, i32 signext %b) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: slliw a0, a0, 5 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addshl32_5_8: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: slliw a0, a0, 5 +; RV64XANDESPERF-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 8 %e = add i32 %c, %d @@ -1706,6 +2273,12 @@ define i64 @addshl64_5_8(i64 %a, i64 %b) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: slli a0, a0, 5 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addshl64_5_8: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: slli a0, a0, 5 +; RV64XANDESPERF-NEXT: ret %c = shl i64 %a, 5 %d = shl i64 %b, 8 %e = add i64 %c, %d @@ -1734,6 +2307,13 @@ define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind { ; RV64ZBAZBB-NEXT: slli a0, a0, 23 ; RV64ZBAZBB-NEXT: srli a0, a0, 32 ; RV64ZBAZBB-NEXT: ret +; +; RV64XANDESPERF-LABEL: sext_ashr_zext_i8: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 56 +; RV64XANDESPERF-NEXT: srai a0, a0, 31 +; RV64XANDESPERF-NEXT: srli a0, a0, 32 +; RV64XANDESPERF-NEXT: ret %ext = sext i8 %a to i32 %1 = ashr i32 %ext, 9 ret i32 %1 @@ -1753,6 +2333,12 @@ define i64 @sh6_sh3_add1(i64 noundef %x, i64 noundef %y, i64 noundef %z) { ; RV64ZBA-NEXT: sh3add a1, a1, a2 ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh6_sh3_add1: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: nds.lea.d a1, a2, a1 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ret entry: %shl = shl i64 %z, 3 %shl1 = shl i64 %y, 6 @@ -1775,6 +2361,13 @@ define i64 @sh6_sh3_add2(i64 noundef %x, i64 noundef %y, i64 noundef %z) { ; RV64ZBA-NEXT: sh3add a1, a1, a2 ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh6_sh3_add2: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: slli a1, a1, 6 +; RV64XANDESPERF-NEXT: add a0, a1, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: ret entry: %shl = shl i64 %z, 3 %shl1 = shl i64 %y, 6 @@ -1797,6 +2390,12 @@ define i64 @sh6_sh3_add3(i64 noundef %x, i64 noundef %y, i64 noundef %z) { ; RV64ZBA-NEXT: sh3add a1, a1, a2 ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh6_sh3_add3: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: nds.lea.d a1, a2, a1 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ret entry: %shl = shl i64 %z, 3 %shl1 = shl i64 %y, 6 @@ -1820,6 +2419,13 @@ define i64 @sh6_sh3_add4(i64 noundef %x, i64 noundef %y, i64 noundef %z) { ; RV64ZBA-NEXT: sh3add a0, a2, a0 ; RV64ZBA-NEXT: add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh6_sh3_add4: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: slli a1, a1, 6 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: ret entry: %shl = shl i64 %z, 3 %shl1 = shl i64 %y, 6 @@ -1850,6 +2456,13 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind { ; RV64ZBAZBB-NEXT: slli a0, a0, 23 ; RV64ZBAZBB-NEXT: srli a0, a0, 32 ; RV64ZBAZBB-NEXT: ret +; +; RV64XANDESPERF-LABEL: sext_ashr_zext_i16: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a0, a0, 48 +; RV64XANDESPERF-NEXT: srai a0, a0, 25 +; RV64XANDESPERF-NEXT: srli a0, a0, 32 +; RV64XANDESPERF-NEXT: ret %ext = sext i16 %a to i32 %1 = ashr i32 %ext, 9 ret i32 %1 @@ -1874,6 +2487,13 @@ define signext i16 @sh1adduw_ptrdiff(i64 %diff, ptr %baseptr) { ; RV64ZBA-NEXT: sh1add.uw a0, a0, a1 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh1adduw_ptrdiff: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srli a0, a0, 1 +; RV64XANDESPERF-NEXT: nds.lea.h.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %ptrdiff = lshr exact i64 %diff, 1 %cast = and i64 %ptrdiff, 4294967295 %ptr = getelementptr inbounds i16, ptr %baseptr, i64 %cast @@ -1898,6 +2518,13 @@ define signext i32 @sh2adduw_ptrdiff(i64 %diff, ptr %baseptr) { ; RV64ZBA-NEXT: sh2add.uw a0, a0, a1 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh2adduw_ptrdiff: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srli a0, a0, 2 +; RV64XANDESPERF-NEXT: nds.lea.w.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %ptrdiff = lshr exact i64 %diff, 2 %cast = and i64 %ptrdiff, 4294967295 %ptr = getelementptr inbounds i32, ptr %baseptr, i64 %cast @@ -1922,6 +2549,13 @@ define i64 @sh3adduw_ptrdiff(i64 %diff, ptr %baseptr) { ; RV64ZBA-NEXT: sh3add.uw a0, a0, a1 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: sh3adduw_ptrdiff: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srli a0, a0, 3 +; RV64XANDESPERF-NEXT: nds.lea.d.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %ptrdiff = lshr exact i64 %diff, 3 %cast = and i64 %ptrdiff, 4294967295 %ptr = getelementptr inbounds i64, ptr %baseptr, i64 %cast @@ -1944,6 +2578,13 @@ define signext i16 @srliw_1_sh1add(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srliw_1_sh1add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srliw a1, a1, 1 +; RV64XANDESPERF-NEXT: nds.lea.h.ze a0, a0, a1 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i32 %1, 1 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i16, ptr %0, i64 %4 @@ -1971,6 +2612,17 @@ define i128 @slliuw_ptrdiff(i64 %diff, ptr %baseptr) { ; RV64ZBA-NEXT: ld a0, 0(a1) ; RV64ZBA-NEXT: ld a1, 8(a1) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: slliuw_ptrdiff: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: li a2, 1 +; RV64XANDESPERF-NEXT: slli a2, a2, 36 +; RV64XANDESPERF-NEXT: addi a2, a2, -16 +; RV64XANDESPERF-NEXT: and a0, a0, a2 +; RV64XANDESPERF-NEXT: add a1, a1, a0 +; RV64XANDESPERF-NEXT: ld a0, 0(a1) +; RV64XANDESPERF-NEXT: ld a1, 8(a1) +; RV64XANDESPERF-NEXT: ret %ptrdiff = lshr exact i64 %diff, 4 %cast = and i64 %ptrdiff, 4294967295 %ptr = getelementptr inbounds i128, ptr %baseptr, i64 %cast @@ -1993,6 +2645,13 @@ define signext i32 @srliw_2_sh2add(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srliw_2_sh2add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srliw a1, a1, 2 +; RV64XANDESPERF-NEXT: nds.lea.w.ze a0, a0, a1 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i32 %1, 2 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i32, ptr %0, i64 %4 @@ -2012,9 +2671,16 @@ define i64 @srliw_3_sh3add(ptr %0, i32 signext %1) { ; RV64ZBA-LABEL: srliw_3_sh3add: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srliw a1, a1, 3 -; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: sh3add.uw a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srliw_3_sh3add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srliw a1, a1, 3 +; RV64XANDESPERF-NEXT: nds.lea.d.ze a0, a0, a1 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i32 %1, 3 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i64, ptr %0, i64 %4 @@ -2037,6 +2703,13 @@ define signext i32 @srliw_1_sh2add(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srliw_1_sh2add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srliw a1, a1, 1 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i32 %1, 1 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i32, ptr %0, i64 %4 @@ -2059,6 +2732,13 @@ define i64 @srliw_1_sh3add(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srliw_1_sh3add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srliw a1, a1, 1 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i32 %1, 1 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i64, ptr %0, i64 %4 @@ -2081,6 +2761,13 @@ define i64 @srliw_2_sh3add(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srliw_2_sh3add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srliw a1, a1, 2 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i32 %1, 2 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i64, ptr %0, i64 %4 @@ -2103,6 +2790,13 @@ define signext i16 @srliw_2_sh1add(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srliw_2_sh1add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srliw a1, a1, 2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i32 %1, 2 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i16, ptr %0, i64 %4 @@ -2126,6 +2820,13 @@ define signext i32 @srliw_3_sh2add(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srliw_3_sh2add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srliw a1, a1, 3 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i32 %1, 3 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i32, ptr %0, i64 %4 @@ -2148,6 +2849,13 @@ define i64 @srliw_4_sh3add(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srliw_4_sh3add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srliw a1, a1, 4 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i32 %1, 4 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i64, ptr %0, i64 %4 @@ -2170,6 +2878,13 @@ define signext i32 @srli_1_sh2add(ptr %0, i64 %1) { ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srli_1_sh2add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srli a1, a1, 1 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i64 %1, 1 %4 = getelementptr inbounds i32, ptr %0, i64 %3 %5 = load i32, ptr %4, align 4 @@ -2191,6 +2906,13 @@ define i64 @srli_2_sh3add(ptr %0, i64 %1) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srli_2_sh3add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srli a1, a1, 2 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i64 %1, 2 %4 = getelementptr inbounds i64, ptr %0, i64 %3 %5 = load i64, ptr %4, align 8 @@ -2212,6 +2934,13 @@ define signext i16 @srli_2_sh1add(ptr %0, i64 %1) { ; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srli_2_sh1add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srli a1, a1, 2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i64 %1, 2 %4 = getelementptr inbounds i16, ptr %0, i64 %3 %5 = load i16, ptr %4, align 2 @@ -2233,6 +2962,13 @@ define signext i32 @srli_3_sh2add(ptr %0, i64 %1) { ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srli_3_sh2add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srli a1, a1, 3 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i64 %1, 3 %4 = getelementptr inbounds i32, ptr %0, i64 %3 %5 = load i32, ptr %4, align 4 @@ -2254,6 +2990,13 @@ define i64 @srli_4_sh3add(ptr %0, i64 %1) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srli_4_sh3add: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srli a1, a1, 4 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = lshr i64 %1, 4 %4 = getelementptr inbounds i64, ptr %0, i64 %3 %5 = load i64, ptr %4, align 8 @@ -2275,6 +3018,13 @@ define signext i16 @shl_2_sh1adduw(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh1add.uw a0, a1, a0 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: shl_2_sh1adduw: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 2 +; RV64XANDESPERF-NEXT: nds.lea.h.ze a0, a0, a1 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = shl i32 %1, 2 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i16, ptr %0, i64 %4 @@ -2297,6 +3047,13 @@ define signext i32 @shl_16_sh2adduw(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh2add.uw a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: shl_16_sh2adduw: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 16 +; RV64XANDESPERF-NEXT: nds.lea.w.ze a0, a0, a1 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = shl i32 %1, 16 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i32, ptr %0, i64 %4 @@ -2319,6 +3076,13 @@ define i64 @shl_31_sh3adduw(ptr %0, i32 signext %1) { ; RV64ZBA-NEXT: sh3add.uw a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: shl_31_sh3adduw: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 31 +; RV64XANDESPERF-NEXT: nds.lea.d.ze a0, a0, a1 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %3 = shl i32 %1, 31 %4 = zext i32 %3 to i64 %5 = getelementptr inbounds i64, ptr %0, i64 %4 @@ -2340,6 +3104,12 @@ define i64 @pack_i64(i64 %a, i64 %b) nounwind { ; RV64ZBA-NEXT: slli a1, a1, 32 ; RV64ZBA-NEXT: add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: pack_i64: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 32 +; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %shl = and i64 %a, 4294967295 %shl1 = shl i64 %b, 32 %or = or i64 %shl1, %shl @@ -2360,6 +3130,12 @@ define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind { ; RV64ZBA-NEXT: slli a1, a1, 32 ; RV64ZBA-NEXT: add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: pack_i64_2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 32 +; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %zexta = zext i32 %a to i64 %zextb = zext i32 %b to i64 %shl1 = shl i64 %zextb, 32 @@ -2379,6 +3155,11 @@ define i64 @pack_i64_disjoint(i64 %a, i64 %b) nounwind { ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: pack_i64_disjoint: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %shl = and i64 %a, 4294967295 %or = or disjoint i64 %b, %shl ret i64 %or @@ -2396,6 +3177,11 @@ define i64 @pack_i64_disjoint_2(i32 signext %a, i64 %b) nounwind { ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add.uw a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: pack_i64_disjoint_2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.b.ze a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %zexta = zext i32 %a to i64 %or = or disjoint i64 %b, %zexta ret i64 %or @@ -2416,6 +3202,13 @@ define i8 @array_index_sh1_sh0(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: add a0, a0, a2 ; RV64ZBA-NEXT: lbu a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh1_sh0: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1 +; RV64XANDESPERF-NEXT: add a0, a0, a2 +; RV64XANDESPERF-NEXT: lbu a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [2 x i8], ptr %p, i64 %idx1, i64 %idx2 %b = load i8, ptr %a, align 1 ret i8 %b @@ -2437,6 +3230,13 @@ define i16 @array_index_sh1_sh1(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh1add a0, a2, a0 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh1_sh1: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [2 x i16], ptr %p, i64 %idx1, i64 %idx2 %b = load i16, ptr %a, align 2 ret i16 %b @@ -2458,6 +3258,13 @@ define i32 @array_index_sh1_sh2(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh2add a0, a2, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh1_sh2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [2 x i32], ptr %p, i64 %idx1, i64 %idx2 %b = load i32, ptr %a, align 4 ret i32 %b @@ -2479,6 +3286,14 @@ define i64 @array_index_sh1_sh3(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh1_sh3: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 4 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [2 x i64], ptr %p, i64 %idx1, i64 %idx2 %b = load i64, ptr %a, align 8 ret i64 %b @@ -2499,6 +3314,13 @@ define i8 @array_index_sh2_sh0(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: add a0, a0, a2 ; RV64ZBA-NEXT: lbu a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh2_sh0: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; RV64XANDESPERF-NEXT: add a0, a0, a2 +; RV64XANDESPERF-NEXT: lbu a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [4 x i8], ptr %p, i64 %idx1, i64 %idx2 %b = load i8, ptr %a, align 1 ret i8 %b @@ -2520,6 +3342,13 @@ define i16 @array_index_sh2_sh1(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh1add a0, a2, a0 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh2_sh1: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [4 x i16], ptr %p, i64 %idx1, i64 %idx2 %b = load i16, ptr %a, align 2 ret i16 %b @@ -2541,6 +3370,14 @@ define i32 @array_index_sh2_sh2(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh2_sh2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 4 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [4 x i32], ptr %p, i64 %idx1, i64 %idx2 %b = load i32, ptr %a, align 4 ret i32 %b @@ -2562,6 +3399,14 @@ define i64 @array_index_sh2_sh3(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh2_sh3: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 5 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [4 x i64], ptr %p, i64 %idx1, i64 %idx2 %b = load i64, ptr %a, align 8 ret i64 %b @@ -2582,6 +3427,13 @@ define i8 @array_index_sh3_sh0(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: add a0, a0, a2 ; RV64ZBA-NEXT: lbu a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh3_sh0: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: add a0, a0, a2 +; RV64XANDESPERF-NEXT: lbu a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [8 x i8], ptr %p, i64 %idx1, i64 %idx2 %b = load i8, ptr %a, align 1 ret i8 %b @@ -2603,6 +3455,14 @@ define i16 @array_index_sh3_sh1(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh3_sh1: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 4 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [8 x i16], ptr %p, i64 %idx1, i64 %idx2 %b = load i16, ptr %a, align 2 ret i16 %b @@ -2624,6 +3484,14 @@ define i32 @array_index_sh3_sh2(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh3_sh2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 5 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [8 x i32], ptr %p, i64 %idx1, i64 %idx2 %b = load i32, ptr %a, align 4 ret i32 %b @@ -2645,6 +3513,14 @@ define i64 @array_index_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh3_sh3: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 6 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [8 x i64], ptr %p, i64 %idx1, i64 %idx2 %b = load i64, ptr %a, align 8 ret i64 %b @@ -2670,6 +3546,15 @@ define i64 @array_index_lshr_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_lshr_sh3_sh3: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srli a1, a1, 58 +; RV64XANDESPERF-NEXT: slli a1, a1, 6 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %shr = lshr i64 %idx1, 58 %a = getelementptr inbounds [8 x i64], ptr %p, i64 %shr, i64 %idx2 %b = load i64, ptr %a, align 8 @@ -2706,6 +3591,14 @@ define i16 @array_index_sh4_sh1(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh1add a0, a2, a0 ; RV64ZBA-NEXT: lh a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh4_sh1: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 5 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: lh a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [16 x i16], ptr %p, i64 %idx1, i64 %idx2 %b = load i16, ptr %a, align 2 ret i16 %b @@ -2728,6 +3621,14 @@ define i32 @array_index_sh4_sh2(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh2add a0, a2, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh4_sh2: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 6 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: lw a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [16 x i32], ptr %p, i64 %idx1, i64 %idx2 %b = load i32, ptr %a, align 4 ret i32 %b @@ -2750,6 +3651,14 @@ define i64 @array_index_sh4_sh3(ptr %p, i64 %idx1, i64 %idx2) { ; RV64ZBA-NEXT: sh3add a0, a2, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: array_index_sh4_sh3: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: slli a1, a1, 7 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: ld a0, 0(a0) +; RV64XANDESPERF-NEXT: ret %a = getelementptr inbounds [16 x i64], ptr %p, i64 %idx1, i64 %idx2 %b = load i64, ptr %a, align 8 ret i64 %b @@ -2771,6 +3680,14 @@ define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) { ; RV64ZBA-NEXT: add a1, a2, a1 ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: test_gep_gep_dont_crash: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: srliw a2, a2, 6 +; RV64XANDESPERF-NEXT: slli a2, a2, 3 +; RV64XANDESPERF-NEXT: add a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ret %lshr = lshr i64 %a2, 6 %and = and i64 %lshr, 67108863 %gep1 = getelementptr i64, ptr %p, i64 %and @@ -2794,6 +3711,14 @@ define i64 @regression(i32 signext %x, i32 signext %y) { ; RV64ZBA-NEXT: slli.uw a0, a0, 3 ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: regression: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: subw a0, a0, a1 +; RV64XANDESPERF-NEXT: slli a0, a0, 32 +; RV64XANDESPERF-NEXT: srli a0, a0, 29 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: ret %sub = sub i32 %x, %y %ext = zext i32 %sub to i64 %res = mul nuw nsw i64 %ext, 24 @@ -2832,6 +3757,12 @@ define i64 @mul_neg3(i64 %a) { ; RV64ZBA-NEXT: sh1add a0, a0, a0 ; RV64ZBA-NEXT: neg a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul_neg3: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: neg a0, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, -3 ret i64 %c } @@ -2859,6 +3790,12 @@ define i64 @mul_neg5(i64 %a) { ; RV64ZBA-NEXT: sh2add a0, a0, a0 ; RV64ZBA-NEXT: neg a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: mul_neg5: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: neg a0, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, -5 ret i64 %c } @@ -2925,6 +3862,14 @@ define i64 @bext_mul12(i32 %1, i32 %2) { ; RV64ZBAZBBZBS-NEXT: sh1add a0, a0, a0 ; RV64ZBAZBBZBS-NEXT: slli a0, a0, 2 ; RV64ZBAZBBZBS-NEXT: ret +; +; RV64XANDESPERF-LABEL: bext_mul12: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: srlw a0, a0, a1 +; RV64XANDESPERF-NEXT: andi a0, a0, 1 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0 +; RV64XANDESPERF-NEXT: slli a0, a0, 2 +; RV64XANDESPERF-NEXT: ret entry: %3 = lshr i32 %1, %2 %4 = and i32 %3, 1 @@ -2964,6 +3909,14 @@ define i64 @bext_mul45(i32 %1, i32 %2) { ; RV64ZBAZBBZBS-NEXT: sh2add a0, a0, a0 ; RV64ZBAZBBZBS-NEXT: sh3add a0, a0, a0 ; RV64ZBAZBBZBS-NEXT: ret +; +; RV64XANDESPERF-LABEL: bext_mul45: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: srlw a0, a0, a1 +; RV64XANDESPERF-NEXT: andi a0, a0, 1 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0 +; RV64XANDESPERF-NEXT: ret entry: %3 = lshr i32 %1, %2 %4 = and i32 %3, 1 @@ -3003,6 +3956,14 @@ define i64 @bext_mul132(i32 %1, i32 %2) { ; RV64ZBAZBBZBS-NEXT: slli a1, a0, 7 ; RV64ZBAZBBZBS-NEXT: sh2add a0, a0, a1 ; RV64ZBAZBBZBS-NEXT: ret +; +; RV64XANDESPERF-LABEL: bext_mul132: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: srlw a0, a0, a1 +; RV64XANDESPERF-NEXT: andi a0, a0, 1 +; RV64XANDESPERF-NEXT: slli a1, a0, 7 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a1, a0 +; RV64XANDESPERF-NEXT: ret entry: %3 = lshr i32 %1, %2 %4 = and i32 %3, 1 @@ -3029,6 +3990,17 @@ define ptr @gep_lshr_i32(ptr %0, i64 %1) { ; RV64ZBA-NEXT: sh2add a1, a1, a1 ; RV64ZBA-NEXT: add a0, a0, a1 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: gep_lshr_i32: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: slli a1, a1, 2 +; RV64XANDESPERF-NEXT: li a2, 1 +; RV64XANDESPERF-NEXT: slli a2, a2, 36 +; RV64XANDESPERF-NEXT: addi a2, a2, -16 +; RV64XANDESPERF-NEXT: and a1, a1, a2 +; RV64XANDESPERF-NEXT: nds.lea.w a1, a1, a1 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: ret entry: %2 = lshr exact i64 %1, 2 %3 = and i64 %2, 4294967295 @@ -3051,6 +4023,15 @@ define i64 @srli_slliuw(i64 %1) { ; RV64ZBA-NEXT: srli a0, a0, 2 ; RV64ZBA-NEXT: slli.uw a0, a0, 4 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srli_slliuw: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: slli a0, a0, 2 +; RV64XANDESPERF-NEXT: li a1, 1 +; RV64XANDESPERF-NEXT: slli a1, a1, 36 +; RV64XANDESPERF-NEXT: addi a1, a1, -16 +; RV64XANDESPERF-NEXT: and a0, a0, a1 +; RV64XANDESPERF-NEXT: ret entry: %2 = lshr exact i64 %1, 2 %3 = and i64 %2, 4294967295 @@ -3073,6 +4054,15 @@ define i64 @srli_slliuw_canonical(i64 %0) { ; RV64ZBA-NEXT: srli a0, a0, 2 ; RV64ZBA-NEXT: slli.uw a0, a0, 4 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srli_slliuw_canonical: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: slli a0, a0, 2 +; RV64XANDESPERF-NEXT: li a1, 1 +; RV64XANDESPERF-NEXT: slli a1, a1, 36 +; RV64XANDESPERF-NEXT: addi a1, a1, -16 +; RV64XANDESPERF-NEXT: and a0, a0, a1 +; RV64XANDESPERF-NEXT: ret entry: %1 = shl i64 %0, 2 %2 = and i64 %1, 68719476720 @@ -3122,6 +4112,15 @@ define i64 @srli_slliuw_2(i64 %1) { ; RV64ZBA-NEXT: srli a0, a0, 18 ; RV64ZBA-NEXT: slli.uw a0, a0, 3 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srli_slliuw_2: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: srli a0, a0, 15 +; RV64XANDESPERF-NEXT: li a1, 1 +; RV64XANDESPERF-NEXT: slli a1, a1, 35 +; RV64XANDESPERF-NEXT: addi a1, a1, -8 +; RV64XANDESPERF-NEXT: and a0, a0, a1 +; RV64XANDESPERF-NEXT: ret entry: %2 = lshr i64 %1, 18 %3 = and i64 %2, 4294967295 @@ -3144,6 +4143,15 @@ define i64 @srli_slliuw_canonical_2(i64 %0) { ; RV64ZBA-NEXT: srli a0, a0, 18 ; RV64ZBA-NEXT: slli.uw a0, a0, 3 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srli_slliuw_canonical_2: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: srli a0, a0, 15 +; RV64XANDESPERF-NEXT: li a1, 1 +; RV64XANDESPERF-NEXT: slli a1, a1, 35 +; RV64XANDESPERF-NEXT: addi a1, a1, -8 +; RV64XANDESPERF-NEXT: and a0, a0, a1 +; RV64XANDESPERF-NEXT: ret entry: %1 = lshr i64 %0, 15 %2 = and i64 %1, 34359738360 @@ -3165,6 +4173,13 @@ define ptr @srai_srli_sh3add(ptr %0, i64 %1) nounwind { ; RV64ZBA-NEXT: srli a1, a1, 6 ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: srai_srli_sh3add: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: srai a1, a1, 32 +; RV64XANDESPERF-NEXT: srli a1, a1, 6 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1 +; RV64XANDESPERF-NEXT: ret entry: %2 = ashr i64 %1, 32 %3 = lshr i64 %2, 6 @@ -3232,6 +4247,16 @@ define i64 @add_u32simm32_zextw(i64 %x) nounwind { ; RV64ZBA-NEXT: addi a0, a0, -2 ; RV64ZBA-NEXT: zext.w a0, a0 ; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: add_u32simm32_zextw: +; RV64XANDESPERF: # %bb.0: # %entry +; RV64XANDESPERF-NEXT: li a1, 1 +; RV64XANDESPERF-NEXT: slli a1, a1, 32 +; RV64XANDESPERF-NEXT: addi a1, a1, -2 +; RV64XANDESPERF-NEXT: add a0, a0, a1 +; RV64XANDESPERF-NEXT: addi a1, a1, 1 +; RV64XANDESPERF-NEXT: and a0, a0, a1 +; RV64XANDESPERF-NEXT: ret entry: %add = add i64 %x, 4294967294 %and = and i64 %add, 4294967295