diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index 439a1bb6e1e69..a33ad8a194afd 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -12,11 +12,14 @@ // extended bits aren't consumed or because the input was already sign extended // by an earlier instruction. // -// Then it removes the -w suffix from addw, slliw and mulw instructions -// whenever all users are dependent only on the lower word of the result of the -// instruction. We do this only for addw, slliw, and mulw because the -w forms -// are less compressible: c.add and c.slli have a larger register encoding than -// their w counterparts, and there's no compressible version of mulw. +// Then it removes the -w suffix from opw instructions whenever all users are +// dependent only on the lower word of the result of the instruction. +// The cases handled are: +// * addw because c.add has a larger register encoding than c.addw. +// * addiw because it helps reduce test differences between RV32 and RV64 +// w/o being a pessimization. +// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb) +// * slliw because c.slliw doesn't exist and c.slli does // //===---------------------------------------------------------------------===// @@ -661,6 +664,7 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF, default: continue; case RISCV::ADDW: Opc = RISCV::ADD; break; + case RISCV::ADDIW: Opc = RISCV::ADDI; break; case RISCV::MULW: Opc = RISCV::MUL; break; case RISCV::SLLIW: Opc = RISCV::SLLI; break; } diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll index a41664fde3858..274f1cef49aa9 100644 --- a/llvm/test/CodeGen/RISCV/add-before-shl.ll +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -25,7 +25,7 @@ define signext i32 @add_small_const(i32 signext %a) nounwind { ; ; RV64I-LABEL: add_small_const: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 1 +; RV64I-NEXT: addi a0, a0, 1 ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: jalr zero, 0(ra) @@ -39,7 +39,7 @@ define signext i32 @add_small_const(i32 signext %a) nounwind { ; ; RV64C-LABEL: add_small_const: ; RV64C: # %bb.0: -; RV64C-NEXT: c.addiw a0, 1 +; RV64C-NEXT: c.addi a0, 1 ; RV64C-NEXT: c.slli a0, 56 ; RV64C-NEXT: c.srai a0, 56 ; RV64C-NEXT: c.jr ra @@ -78,7 +78,7 @@ define signext i32 @add_large_const(i32 signext %a) nounwind { ; RV64C-LABEL: add_large_const: ; RV64C: # %bb.0: ; RV64C-NEXT: c.lui a1, 1 -; RV64C-NEXT: c.addiw a1, -1 +; RV64C-NEXT: c.addi a1, -1 ; RV64C-NEXT: c.add a0, a1 ; RV64C-NEXT: c.slli a0, 48 ; RV64C-NEXT: c.srai a0, 48 @@ -118,7 +118,7 @@ define signext i32 @add_huge_const(i32 signext %a) nounwind { ; RV64C-LABEL: add_huge_const: ; RV64C: # %bb.0: ; RV64C-NEXT: c.lui a1, 8 -; RV64C-NEXT: c.addiw a1, -1 +; RV64C-NEXT: c.addi a1, -1 ; RV64C-NEXT: c.add a0, a1 ; RV64C-NEXT: c.slli a0, 48 ; RV64C-NEXT: c.srai a0, 48 @@ -139,7 +139,7 @@ define signext i24 @add_non_machine_type(i24 signext %a) nounwind { ; ; RV64I-LABEL: add_non_machine_type: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 256 +; RV64I-NEXT: addi a0, a0, 256 ; RV64I-NEXT: slli a0, a0, 52 ; RV64I-NEXT: srai a0, a0, 40 ; RV64I-NEXT: jalr zero, 0(ra) @@ -153,7 +153,7 @@ define signext i24 @add_non_machine_type(i24 signext %a) nounwind { ; ; RV64C-LABEL: add_non_machine_type: ; RV64C: # %bb.0: -; RV64C-NEXT: addiw a0, a0, 256 +; RV64C-NEXT: addi a0, a0, 256 ; RV64C-NEXT: c.slli a0, 52 ; RV64C-NEXT: c.srai a0, 40 ; RV64C-NEXT: c.jr ra diff --git a/llvm/test/CodeGen/RISCV/add-imm.ll b/llvm/test/CodeGen/RISCV/add-imm.ll index 700fec0192d3e..52751f1c22421 100644 --- a/llvm/test/CodeGen/RISCV/add-imm.ll +++ b/llvm/test/CodeGen/RISCV/add-imm.ll @@ -29,7 +29,7 @@ define i32 @add_positive_low_bound_accept(i32 %a) nounwind { ; ; RV64I-LABEL: add_positive_low_bound_accept: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 2047 +; RV64I-NEXT: addi a0, a0, 2047 ; RV64I-NEXT: addiw a0, a0, 1 ; RV64I-NEXT: ret %1 = add i32 %a, 2048 @@ -45,7 +45,7 @@ define i32 @add_positive_high_bound_accept(i32 %a) nounwind { ; ; RV64I-LABEL: add_positive_high_bound_accept: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 2047 +; RV64I-NEXT: addi a0, a0, 2047 ; RV64I-NEXT: addiw a0, a0, 2047 ; RV64I-NEXT: ret %1 = add i32 %a, 4094 @@ -63,7 +63,7 @@ define i32 @add_positive_high_bound_reject(i32 %a) nounwind { ; RV64I-LABEL: add_positive_high_bound_reject: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a1, 1 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %1 = add i32 %a, 4095 @@ -93,7 +93,7 @@ define i32 @add_negative_high_bound_accept(i32 %a) nounwind { ; ; RV64I-LABEL: add_negative_high_bound_accept: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, -2048 +; RV64I-NEXT: addi a0, a0, -2048 ; RV64I-NEXT: addiw a0, a0, -1 ; RV64I-NEXT: ret %1 = add i32 %a, -2049 @@ -109,7 +109,7 @@ define i32 @add_negative_low_bound_accept(i32 %a) nounwind { ; ; RV64I-LABEL: add_negative_low_bound_accept: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, -2048 +; RV64I-NEXT: addi a0, a0, -2048 ; RV64I-NEXT: addiw a0, a0, -2048 ; RV64I-NEXT: ret %1 = add i32 %a, -4096 @@ -127,7 +127,7 @@ define i32 @add_negative_low_bound_reject(i32 %a) nounwind { ; RV64I-LABEL: add_negative_low_bound_reject: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a1, 1048575 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %1 = add i32 %a, -4097 @@ -143,7 +143,7 @@ define i32 @add32_accept(i32 %a) nounwind { ; ; RV64I-LABEL: add32_accept: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 2047 +; RV64I-NEXT: addi a0, a0, 2047 ; RV64I-NEXT: addiw a0, a0, 952 ; RV64I-NEXT: ret %1 = add i32 %a, 2999 @@ -159,7 +159,7 @@ define signext i32 @add32_sext_accept(i32 signext %a) nounwind { ; ; RV64I-LABEL: add32_sext_accept: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 2047 +; RV64I-NEXT: addi a0, a0, 2047 ; RV64I-NEXT: addiw a0, a0, 952 ; RV64I-NEXT: ret %1 = add i32 %a, 2999 @@ -178,7 +178,7 @@ define signext i32 @add32_sext_reject_on_rv64(i32 signext %a) nounwind { ; ; RV64I-LABEL: add32_sext_reject_on_rv64: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 2047 +; RV64I-NEXT: addi a0, a0, 2047 ; RV64I-NEXT: addiw a0, a0, 953 ; RV64I-NEXT: lui a1, %hi(gv0) ; RV64I-NEXT: sw a0, %lo(gv0)(a1) @@ -231,7 +231,7 @@ define void @add32_reject() nounwind { ; RV64I-NEXT: lui a2, %hi(gb) ; RV64I-NEXT: lw a3, %lo(gb)(a2) ; RV64I-NEXT: lui a4, 1 -; RV64I-NEXT: addiw a4, a4, -1096 +; RV64I-NEXT: addi a4, a4, -1096 ; RV64I-NEXT: add a1, a1, a4 ; RV64I-NEXT: add a3, a3, a4 ; RV64I-NEXT: sw a1, %lo(ga)(a0) diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll index d1bc480455dd3..48fa69e104565 100644 --- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll +++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll @@ -84,7 +84,7 @@ define i32 @add_mul_combine_accept_b1(i32 %x) { ; RV64IMB-NEXT: li a1, 23 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: lui a1, 50 -; RV64IMB-NEXT: addiw a1, a1, 1119 +; RV64IMB-NEXT: addi a1, a1, 1119 ; RV64IMB-NEXT: addw a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i32 %x, 8953 @@ -107,7 +107,7 @@ define signext i32 @add_mul_combine_accept_b2(i32 signext %x) { ; RV64IMB-NEXT: li a1, 23 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: lui a1, 50 -; RV64IMB-NEXT: addiw a1, a1, 1119 +; RV64IMB-NEXT: addi a1, a1, 1119 ; RV64IMB-NEXT: addw a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i32 %x, 8953 @@ -153,7 +153,7 @@ define i32 @add_mul_combine_reject_a1(i32 %x) { ; ; RV64IMB-LABEL: add_mul_combine_reject_a1: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 1971 +; RV64IMB-NEXT: addi a0, a0, 1971 ; RV64IMB-NEXT: li a1, 29 ; RV64IMB-NEXT: mulw a0, a0, a1 ; RV64IMB-NEXT: ret @@ -172,7 +172,7 @@ define signext i32 @add_mul_combine_reject_a2(i32 signext %x) { ; ; RV64IMB-LABEL: add_mul_combine_reject_a2: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 1971 +; RV64IMB-NEXT: addi a0, a0, 1971 ; RV64IMB-NEXT: li a1, 29 ; RV64IMB-NEXT: mulw a0, a0, a1 ; RV64IMB-NEXT: ret @@ -217,7 +217,7 @@ define i32 @add_mul_combine_reject_c1(i32 %x) { ; ; RV64IMB-LABEL: add_mul_combine_reject_c1: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 1000 +; RV64IMB-NEXT: addi a0, a0, 1000 ; RV64IMB-NEXT: sh3add a1, a0, a0 ; RV64IMB-NEXT: sh3add a0, a1, a0 ; RV64IMB-NEXT: sext.w a0, a0 @@ -237,7 +237,7 @@ define signext i32 @add_mul_combine_reject_c2(i32 signext %x) { ; ; RV64IMB-LABEL: add_mul_combine_reject_c2: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 1000 +; RV64IMB-NEXT: addi a0, a0, 1000 ; RV64IMB-NEXT: sh3add a1, a0, a0 ; RV64IMB-NEXT: sh3add a0, a1, a0 ; RV64IMB-NEXT: sext.w a0, a0 @@ -349,7 +349,7 @@ define i32 @add_mul_combine_reject_e1(i32 %x) { ; ; RV64IMB-LABEL: add_mul_combine_reject_e1: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 1971 +; RV64IMB-NEXT: addi a0, a0, 1971 ; RV64IMB-NEXT: li a1, 29 ; RV64IMB-NEXT: mulw a0, a0, a1 ; RV64IMB-NEXT: ret @@ -368,7 +368,7 @@ define signext i32 @add_mul_combine_reject_e2(i32 signext %x) { ; ; RV64IMB-LABEL: add_mul_combine_reject_e2: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 1971 +; RV64IMB-NEXT: addi a0, a0, 1971 ; RV64IMB-NEXT: li a1, 29 ; RV64IMB-NEXT: mulw a0, a0, a1 ; RV64IMB-NEXT: ret @@ -414,7 +414,7 @@ define i32 @add_mul_combine_reject_f1(i32 %x) { ; ; RV64IMB-LABEL: add_mul_combine_reject_f1: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 1972 +; RV64IMB-NEXT: addi a0, a0, 1972 ; RV64IMB-NEXT: li a1, 29 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, 11 @@ -435,7 +435,7 @@ define signext i32 @add_mul_combine_reject_f2(i32 signext %x) { ; ; RV64IMB-LABEL: add_mul_combine_reject_f2: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 1972 +; RV64IMB-NEXT: addi a0, a0, 1972 ; RV64IMB-NEXT: li a1, 29 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, 11 @@ -483,7 +483,7 @@ define i32 @add_mul_combine_reject_g1(i32 %x) { ; ; RV64IMB-LABEL: add_mul_combine_reject_g1: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 100 +; RV64IMB-NEXT: addi a0, a0, 100 ; RV64IMB-NEXT: sh3add a1, a0, a0 ; RV64IMB-NEXT: sh3add a0, a1, a0 ; RV64IMB-NEXT: addiw a0, a0, 10 @@ -504,7 +504,7 @@ define signext i32 @add_mul_combine_reject_g2(i32 signext %x) { ; ; RV64IMB-LABEL: add_mul_combine_reject_g2: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 100 +; RV64IMB-NEXT: addi a0, a0, 100 ; RV64IMB-NEXT: sh3add a1, a0, a0 ; RV64IMB-NEXT: sh3add a0, a1, a0 ; RV64IMB-NEXT: addiw a0, a0, 10 @@ -581,9 +581,9 @@ define i32 @mul3000_add8990_a(i32 %x) { ; ; RV64IMB-LABEL: mul3000_add8990_a: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 3 +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1 -; RV64IMB-NEXT: addiw a1, a1, -1096 +; RV64IMB-NEXT: addi a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, -10 ; RV64IMB-NEXT: ret @@ -604,9 +604,9 @@ define signext i32 @mul3000_add8990_b(i32 signext %x) { ; ; RV64IMB-LABEL: mul3000_add8990_b: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 3 +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1 -; RV64IMB-NEXT: addiw a1, a1, -1096 +; RV64IMB-NEXT: addi a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, -10 ; RV64IMB-NEXT: ret @@ -656,9 +656,9 @@ define i32 @mul3000_sub8990_a(i32 %x) { ; ; RV64IMB-LABEL: mul3000_sub8990_a: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, -3 +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1 -; RV64IMB-NEXT: addiw a1, a1, -1096 +; RV64IMB-NEXT: addi a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, 10 ; RV64IMB-NEXT: ret @@ -679,9 +679,9 @@ define signext i32 @mul3000_sub8990_b(i32 signext %x) { ; ; RV64IMB-LABEL: mul3000_sub8990_b: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, -3 +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1 -; RV64IMB-NEXT: addiw a1, a1, -1096 +; RV64IMB-NEXT: addi a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, 10 ; RV64IMB-NEXT: ret @@ -732,9 +732,9 @@ define i32 @mulneg3000_add8990_a(i32 %x) { ; ; RV64IMB-LABEL: mulneg3000_add8990_a: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, -3 +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1048575 -; RV64IMB-NEXT: addiw a1, a1, 1096 +; RV64IMB-NEXT: addi a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, -10 ; RV64IMB-NEXT: ret @@ -755,9 +755,9 @@ define signext i32 @mulneg3000_add8990_b(i32 signext %x) { ; ; RV64IMB-LABEL: mulneg3000_add8990_b: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, -3 +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1048575 -; RV64IMB-NEXT: addiw a1, a1, 1096 +; RV64IMB-NEXT: addi a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, -10 ; RV64IMB-NEXT: ret @@ -808,9 +808,9 @@ define i32 @mulneg3000_sub8990_a(i32 %x) { ; ; RV64IMB-LABEL: mulneg3000_sub8990_a: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 3 +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1048575 -; RV64IMB-NEXT: addiw a1, a1, 1096 +; RV64IMB-NEXT: addi a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, 10 ; RV64IMB-NEXT: ret @@ -831,9 +831,9 @@ define signext i32 @mulneg3000_sub8990_b(i32 signext %x) { ; ; RV64IMB-LABEL: mulneg3000_sub8990_b: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addiw a0, a0, 3 +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1048575 -; RV64IMB-NEXT: addiw a1, a1, 1096 +; RV64IMB-NEXT: addi a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, 10 ; RV64IMB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/and.ll b/llvm/test/CodeGen/RISCV/and.ll index 5eff422013da6..79e3b954c50d8 100644 --- a/llvm/test/CodeGen/RISCV/and.ll +++ b/llvm/test/CodeGen/RISCV/and.ll @@ -195,7 +195,7 @@ define i64 @and64_0x7fffffff00000000(i64 %x) { ; RV64I-LABEL: and64_0x7fffffff00000000: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a1, 524288 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll index f900b5161f751..eea4cb72938af 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -1104,7 +1104,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw ; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a5, a4, a0 ; RV64IA-NEXT: and a1, a1, a4 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -1206,7 +1206,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-WMO-NEXT: andi a3, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a5, a4, a0 ; RV64IA-WMO-NEXT: and a1, a1, a4 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -1230,7 +1230,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-TSO-NEXT: andi a3, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a5, a4, a0 ; RV64IA-TSO-NEXT: and a1, a1, a4 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -1332,7 +1332,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-NEXT: andi a3, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a5, a4, a0 ; RV64IA-WMO-NEXT: and a1, a1, a4 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -1356,7 +1356,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-NEXT: andi a3, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a5, a4, a0 ; RV64IA-TSO-NEXT: and a1, a1, a4 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -1458,7 +1458,7 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-WMO-NEXT: andi a3, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a5, a4, a0 ; RV64IA-WMO-NEXT: and a1, a1, a4 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -1482,7 +1482,7 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-TSO-NEXT: andi a3, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a5, a4, a0 ; RV64IA-TSO-NEXT: and a1, a1, a4 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -1584,7 +1584,7 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-NEXT: andi a3, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a5, a4, a0 ; RV64IA-WMO-NEXT: and a1, a1, a4 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -1608,7 +1608,7 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-NEXT: andi a3, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a5, a4, a0 ; RV64IA-TSO-NEXT: and a1, a1, a4 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -1710,7 +1710,7 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-WMO-NEXT: andi a3, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a5, a4, a0 ; RV64IA-WMO-NEXT: and a1, a1, a4 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -1734,7 +1734,7 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-TSO-NEXT: andi a3, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a5, a4, a0 ; RV64IA-TSO-NEXT: and a1, a1, a4 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -1836,7 +1836,7 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-NEXT: andi a3, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a5, a4, a0 ; RV64IA-WMO-NEXT: and a1, a1, a4 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -1860,7 +1860,7 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-NEXT: andi a3, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a5, a4, a0 ; RV64IA-TSO-NEXT: and a1, a1, a4 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -1938,7 +1938,7 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a5, a4, a0 ; RV64IA-NEXT: and a1, a1, a4 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -2016,7 +2016,7 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a5, a4, a0 ; RV64IA-NEXT: and a1, a1, a4 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -2094,7 +2094,7 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a5, a4, a0 ; RV64IA-NEXT: and a1, a1, a4 ; RV64IA-NEXT: sllw a1, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index c4f224dcba1b2..e97a1ea5dfca0 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -6864,7 +6864,7 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -6951,7 +6951,7 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -6972,7 +6972,7 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -7059,7 +7059,7 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -7080,7 +7080,7 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -7167,7 +7167,7 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -7188,7 +7188,7 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -7254,7 +7254,7 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -7316,7 +7316,7 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind { ; RV64IA-NEXT: andi a1, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 +; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: not a2, a2 ; RV64IA-NEXT: amoand.w a1, a2, (a1) @@ -7378,7 +7378,7 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ; RV64IA-WMO-NEXT: andi a1, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a2, 16 -; RV64IA-WMO-NEXT: addiw a2, a2, -1 +; RV64IA-WMO-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NEXT: not a2, a2 ; RV64IA-WMO-NEXT: amoand.w.aq a1, a2, (a1) @@ -7390,7 +7390,7 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ; RV64IA-TSO-NEXT: andi a1, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a2, 16 -; RV64IA-TSO-NEXT: addiw a2, a2, -1 +; RV64IA-TSO-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NEXT: not a2, a2 ; RV64IA-TSO-NEXT: amoand.w a1, a2, (a1) @@ -7452,7 +7452,7 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ; RV64IA-WMO-NEXT: andi a1, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a2, 16 -; RV64IA-WMO-NEXT: addiw a2, a2, -1 +; RV64IA-WMO-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NEXT: not a2, a2 ; RV64IA-WMO-NEXT: amoand.w.rl a1, a2, (a1) @@ -7464,7 +7464,7 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ; RV64IA-TSO-NEXT: andi a1, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a2, 16 -; RV64IA-TSO-NEXT: addiw a2, a2, -1 +; RV64IA-TSO-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NEXT: not a2, a2 ; RV64IA-TSO-NEXT: amoand.w a1, a2, (a1) @@ -7526,7 +7526,7 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ; RV64IA-WMO-NEXT: andi a1, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a2, 16 -; RV64IA-WMO-NEXT: addiw a2, a2, -1 +; RV64IA-WMO-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NEXT: not a2, a2 ; RV64IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1) @@ -7538,7 +7538,7 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ; RV64IA-TSO-NEXT: andi a1, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a2, 16 -; RV64IA-TSO-NEXT: addiw a2, a2, -1 +; RV64IA-TSO-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NEXT: not a2, a2 ; RV64IA-TSO-NEXT: amoand.w a1, a2, (a1) @@ -7600,7 +7600,7 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ; RV64IA-WMO-NEXT: andi a1, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a2, 16 -; RV64IA-WMO-NEXT: addiw a2, a2, -1 +; RV64IA-WMO-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NEXT: not a2, a2 ; RV64IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1) @@ -7612,7 +7612,7 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ; RV64IA-TSO-NEXT: andi a1, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a2, 16 -; RV64IA-TSO-NEXT: addiw a2, a2, -1 +; RV64IA-TSO-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NEXT: not a2, a2 ; RV64IA-TSO-NEXT: amoand.w a1, a2, (a1) @@ -7663,7 +7663,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind { ; RV64IA-NEXT: andi a1, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a2, 16 -; RV64IA-NEXT: addiw a2, a2, -1 +; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: amoor.w a1, a2, (a1) ; RV64IA-NEXT: srlw a0, a1, a0 @@ -7724,7 +7724,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ; RV64IA-WMO-NEXT: andi a1, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a2, 16 -; RV64IA-WMO-NEXT: addiw a2, a2, -1 +; RV64IA-WMO-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NEXT: amoor.w.aq a1, a2, (a1) ; RV64IA-WMO-NEXT: srlw a0, a1, a0 @@ -7735,7 +7735,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ; RV64IA-TSO-NEXT: andi a1, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a2, 16 -; RV64IA-TSO-NEXT: addiw a2, a2, -1 +; RV64IA-TSO-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NEXT: amoor.w a1, a2, (a1) ; RV64IA-TSO-NEXT: srlw a0, a1, a0 @@ -7796,7 +7796,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ; RV64IA-WMO-NEXT: andi a1, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a2, 16 -; RV64IA-WMO-NEXT: addiw a2, a2, -1 +; RV64IA-WMO-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NEXT: amoor.w.rl a1, a2, (a1) ; RV64IA-WMO-NEXT: srlw a0, a1, a0 @@ -7807,7 +7807,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ; RV64IA-TSO-NEXT: andi a1, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a2, 16 -; RV64IA-TSO-NEXT: addiw a2, a2, -1 +; RV64IA-TSO-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NEXT: amoor.w a1, a2, (a1) ; RV64IA-TSO-NEXT: srlw a0, a1, a0 @@ -7868,7 +7868,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ; RV64IA-WMO-NEXT: andi a1, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a2, 16 -; RV64IA-WMO-NEXT: addiw a2, a2, -1 +; RV64IA-WMO-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1) ; RV64IA-WMO-NEXT: srlw a0, a1, a0 @@ -7879,7 +7879,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ; RV64IA-TSO-NEXT: andi a1, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a2, 16 -; RV64IA-TSO-NEXT: addiw a2, a2, -1 +; RV64IA-TSO-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NEXT: amoor.w a1, a2, (a1) ; RV64IA-TSO-NEXT: srlw a0, a1, a0 @@ -7940,7 +7940,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ; RV64IA-WMO-NEXT: andi a1, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a2, 16 -; RV64IA-WMO-NEXT: addiw a2, a2, -1 +; RV64IA-WMO-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1) ; RV64IA-WMO-NEXT: srlw a0, a1, a0 @@ -7951,7 +7951,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ; RV64IA-TSO-NEXT: andi a1, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a2, 16 -; RV64IA-TSO-NEXT: addiw a2, a2, -1 +; RV64IA-TSO-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NEXT: amoor.w a1, a2, (a1) ; RV64IA-TSO-NEXT: srlw a0, a1, a0 @@ -8007,7 +8007,7 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -8094,7 +8094,7 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -8115,7 +8115,7 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -8202,7 +8202,7 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -8223,7 +8223,7 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -8310,7 +8310,7 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -8331,7 +8331,7 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -8397,7 +8397,7 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -8463,7 +8463,7 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -8550,7 +8550,7 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -8571,7 +8571,7 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -8658,7 +8658,7 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -8679,7 +8679,7 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -8766,7 +8766,7 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -8787,7 +8787,7 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -8853,7 +8853,7 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -8913,7 +8913,7 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: not a4, a4 ; RV64IA-NEXT: and a1, a1, a3 @@ -8982,7 +8982,7 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: not a4, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 @@ -8997,7 +8997,7 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: not a4, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 @@ -9066,7 +9066,7 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: not a4, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 @@ -9081,7 +9081,7 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: not a4, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 @@ -9150,7 +9150,7 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: not a4, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 @@ -9165,7 +9165,7 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: not a4, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 @@ -9234,7 +9234,7 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: not a4, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 @@ -9249,7 +9249,7 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: not a4, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 @@ -9310,7 +9310,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -9400,7 +9400,7 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -9422,7 +9422,7 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -9512,7 +9512,7 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -9534,7 +9534,7 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -9624,7 +9624,7 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -9646,7 +9646,7 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -9714,7 +9714,7 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -10492,7 +10492,7 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 @@ -10673,7 +10673,7 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: andi a3, a0, 24 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a4, a4, a0 ; RV64IA-WMO-NEXT: slli a1, a1, 48 ; RV64IA-WMO-NEXT: srai a1, a1, 48 @@ -10704,7 +10704,7 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: andi a3, a0, 24 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a4, a4, a0 ; RV64IA-TSO-NEXT: slli a1, a1, 48 ; RV64IA-TSO-NEXT: srai a1, a1, 48 @@ -10885,7 +10885,7 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: andi a3, a0, 24 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a4, a4, a0 ; RV64IA-WMO-NEXT: slli a1, a1, 48 ; RV64IA-WMO-NEXT: srai a1, a1, 48 @@ -10916,7 +10916,7 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: andi a3, a0, 24 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a4, a4, a0 ; RV64IA-TSO-NEXT: slli a1, a1, 48 ; RV64IA-TSO-NEXT: srai a1, a1, 48 @@ -11097,7 +11097,7 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: andi a3, a0, 24 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a4, a4, a0 ; RV64IA-WMO-NEXT: slli a1, a1, 48 ; RV64IA-WMO-NEXT: srai a1, a1, 48 @@ -11128,7 +11128,7 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: andi a3, a0, 24 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a4, a4, a0 ; RV64IA-TSO-NEXT: slli a1, a1, 48 ; RV64IA-TSO-NEXT: srai a1, a1, 48 @@ -11278,7 +11278,7 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 @@ -11428,7 +11428,7 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 @@ -11609,7 +11609,7 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: andi a3, a0, 24 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a4, a4, a0 ; RV64IA-WMO-NEXT: slli a1, a1, 48 ; RV64IA-WMO-NEXT: srai a1, a1, 48 @@ -11640,7 +11640,7 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: andi a3, a0, 24 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a4, a4, a0 ; RV64IA-TSO-NEXT: slli a1, a1, 48 ; RV64IA-TSO-NEXT: srai a1, a1, 48 @@ -11821,7 +11821,7 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: andi a3, a0, 24 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a4, a4, a0 ; RV64IA-WMO-NEXT: slli a1, a1, 48 ; RV64IA-WMO-NEXT: srai a1, a1, 48 @@ -11852,7 +11852,7 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: andi a3, a0, 24 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a4, a4, a0 ; RV64IA-TSO-NEXT: slli a1, a1, 48 ; RV64IA-TSO-NEXT: srai a1, a1, 48 @@ -12033,7 +12033,7 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: andi a3, a0, 24 ; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addiw a4, a4, -1 +; RV64IA-WMO-NEXT: addi a4, a4, -1 ; RV64IA-WMO-NEXT: sllw a4, a4, a0 ; RV64IA-WMO-NEXT: slli a1, a1, 48 ; RV64IA-WMO-NEXT: srai a1, a1, 48 @@ -12064,7 +12064,7 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: andi a3, a0, 24 ; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addiw a4, a4, -1 +; RV64IA-TSO-NEXT: addi a4, a4, -1 ; RV64IA-TSO-NEXT: sllw a4, a4, a0 ; RV64IA-TSO-NEXT: slli a1, a1, 48 ; RV64IA-TSO-NEXT: srai a1, a1, 48 @@ -12214,7 +12214,7 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 @@ -12361,7 +12361,7 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -12528,7 +12528,7 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -12553,7 +12553,7 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -12720,7 +12720,7 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -12745,7 +12745,7 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -12912,7 +12912,7 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -12937,7 +12937,7 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -13079,7 +13079,7 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -13221,7 +13221,7 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -13388,7 +13388,7 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -13413,7 +13413,7 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -13580,7 +13580,7 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -13605,7 +13605,7 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -13772,7 +13772,7 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: addiw a3, a3, -1 +; RV64IA-WMO-NEXT: addi a3, a3, -1 ; RV64IA-WMO-NEXT: sllw a4, a3, a0 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 @@ -13797,7 +13797,7 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: addiw a3, a3, -1 +; RV64IA-TSO-NEXT: addi a3, a3, -1 ; RV64IA-TSO-NEXT: sllw a4, a3, a0 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 @@ -13939,7 +13939,7 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index c08f045ee8abc..bd945c865c359 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -1207,7 +1207,7 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -1281,7 +1281,7 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -1355,7 +1355,7 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -1423,7 +1423,7 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: not a4, a4 ; RV64IA-NEXT: and a1, a1, a3 @@ -1492,7 +1492,7 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -1748,7 +1748,7 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 @@ -1904,7 +1904,7 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 @@ -2057,7 +2057,7 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -2205,7 +2205,7 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: addi a3, a3, -1 ; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -3969,7 +3969,7 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext % ; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a5, a4, a0 ; RV64IA-NEXT: and a1, a1, a4 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -4054,7 +4054,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16 ; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addiw a4, a4, -1 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: sllw a5, a4, a0 ; RV64IA-NEXT: and a1, a1, a4 ; RV64IA-NEXT: sllw a1, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index 4d75a74f06ac2..5f15a9c067102 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -140,7 +140,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV64IA-NEXT: srlw a5, a3, a0 ; RV64IA-NEXT: sext.w a6, a3 ; RV64IA-NEXT: andi a7, a5, 255 -; RV64IA-NEXT: addiw a5, a5, 1 +; RV64IA-NEXT: addi a5, a5, 1 ; RV64IA-NEXT: sltu a7, a7, a1 ; RV64IA-NEXT: negw a7, a7 ; RV64IA-NEXT: and a5, a7, a5 @@ -304,7 +304,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV64IA-NEXT: srlw a6, a4, a0 ; RV64IA-NEXT: sext.w a7, a4 ; RV64IA-NEXT: and t0, a6, a3 -; RV64IA-NEXT: addiw a6, a6, 1 +; RV64IA-NEXT: addi a6, a6, 1 ; RV64IA-NEXT: sltu t0, t0, a1 ; RV64IA-NEXT: negw t0, t0 ; RV64IA-NEXT: and a6, a6, a3 diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll index b68f74a1f7c3a..8a0c4240d161b 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -404,7 +404,7 @@ define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind { ; CHECK64ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5 ; CHECK64ZFBFMIN-NEXT: seqz a1, a1 -; CHECK64ZFBFMIN-NEXT: addiw a1, a1, -1 +; CHECK64ZFBFMIN-NEXT: addi a1, a1, -1 ; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK64ZFBFMIN-NEXT: slli a0, a0, 32 ; CHECK64ZFBFMIN-NEXT: srli a0, a0, 32 @@ -420,7 +420,7 @@ define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind { ; RV64ID-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64ID-NEXT: feq.s a1, fa5, fa5 ; RV64ID-NEXT: seqz a1, a1 -; RV64ID-NEXT: addiw a1, a1, -1 +; RV64ID-NEXT: addi a1, a1, -1 ; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: slli a0, a0, 32 ; RV64ID-NEXT: srli a0, a0, 32 @@ -1722,7 +1722,7 @@ define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind { ; CHECK64ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5 ; CHECK64ZFBFMIN-NEXT: seqz a1, a1 -; CHECK64ZFBFMIN-NEXT: addiw a1, a1, -1 +; CHECK64ZFBFMIN-NEXT: addi a1, a1, -1 ; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK64ZFBFMIN-NEXT: slli a0, a0, 32 ; CHECK64ZFBFMIN-NEXT: srli a0, a0, 32 @@ -1738,7 +1738,7 @@ define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind { ; RV64ID-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64ID-NEXT: feq.s a1, fa5, fa5 ; RV64ID-NEXT: seqz a1, a1 -; RV64ID-NEXT: addiw a1, a1, -1 +; RV64ID-NEXT: addi a1, a1, -1 ; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: slli a0, a0, 32 ; RV64ID-NEXT: srli a0, a0, 32 diff --git a/llvm/test/CodeGen/RISCV/bfloat.ll b/llvm/test/CodeGen/RISCV/bfloat.ll index c95d61fd6baab..5013f76f9b0b3 100644 --- a/llvm/test/CodeGen/RISCV/bfloat.ll +++ b/llvm/test/CodeGen/RISCV/bfloat.ll @@ -367,7 +367,7 @@ define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind { ; RV64ID-LP64-NEXT: addi sp, sp, -16 ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: lui a2, 16 -; RV64ID-LP64-NEXT: addiw a2, a2, -1 +; RV64ID-LP64-NEXT: addi a2, a2, -1 ; RV64ID-LP64-NEXT: and a0, a0, a2 ; RV64ID-LP64-NEXT: and a1, a1, a2 ; RV64ID-LP64-NEXT: slli a1, a1, 16 @@ -409,7 +409,7 @@ define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind { ; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 ; RV64ID-LP64D-NEXT: lui a1, 16 -; RV64ID-LP64D-NEXT: addiw a1, a1, -1 +; RV64ID-LP64D-NEXT: addi a1, a1, -1 ; RV64ID-LP64D-NEXT: and a0, a0, a1 ; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1 ; RV64ID-LP64D-NEXT: and a1, a2, a1 @@ -605,7 +605,7 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind { ; RV64ID-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: mv s0, a0 ; RV64ID-LP64-NEXT: lui a0, 16 -; RV64ID-LP64-NEXT: addiw a0, a0, -1 +; RV64ID-LP64-NEXT: addi a0, a0, -1 ; RV64ID-LP64-NEXT: and a1, a1, a0 ; RV64ID-LP64-NEXT: and a0, a2, a0 ; RV64ID-LP64-NEXT: slli a0, a0, 16 @@ -652,7 +652,7 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind { ; RV64ID-LP64D-NEXT: mv s0, a0 ; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 ; RV64ID-LP64D-NEXT: lui a1, 16 -; RV64ID-LP64D-NEXT: addiw a1, a1, -1 +; RV64ID-LP64D-NEXT: addi a1, a1, -1 ; RV64ID-LP64D-NEXT: and a0, a0, a1 ; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1 ; RV64ID-LP64D-NEXT: and a1, a2, a1 diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll index d281c94277354..f560a112dd92b 100644 --- a/llvm/test/CodeGen/RISCV/bittest.ll +++ b/llvm/test/CodeGen/RISCV/bittest.ll @@ -266,7 +266,7 @@ define i1 @bittest_constant_by_var_shr_i32(i32 signext %b) nounwind { ; RV64I-LABEL: bittest_constant_by_var_shr_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a1, 301408 -; RV64I-NEXT: addiw a1, a1, 722 +; RV64I-NEXT: addi a1, a1, 722 ; RV64I-NEXT: srlw a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 1 ; RV64I-NEXT: ret @@ -296,7 +296,7 @@ define i1 @bittest_constant_by_var_shr_i32(i32 signext %b) nounwind { ; RV64XTHEADBS-LABEL: bittest_constant_by_var_shr_i32: ; RV64XTHEADBS: # %bb.0: ; RV64XTHEADBS-NEXT: lui a1, 301408 -; RV64XTHEADBS-NEXT: addiw a1, a1, 722 +; RV64XTHEADBS-NEXT: addi a1, a1, 722 ; RV64XTHEADBS-NEXT: srlw a0, a1, a0 ; RV64XTHEADBS-NEXT: andi a0, a0, 1 ; RV64XTHEADBS-NEXT: ret @@ -319,7 +319,7 @@ define i1 @bittest_constant_by_var_shl_i32(i32 signext %b) nounwind { ; RV64I-LABEL: bittest_constant_by_var_shl_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a1, 301408 -; RV64I-NEXT: addiw a1, a1, 722 +; RV64I-NEXT: addi a1, a1, 722 ; RV64I-NEXT: srlw a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 1 ; RV64I-NEXT: ret @@ -349,7 +349,7 @@ define i1 @bittest_constant_by_var_shl_i32(i32 signext %b) nounwind { ; RV64XTHEADBS-LABEL: bittest_constant_by_var_shl_i32: ; RV64XTHEADBS: # %bb.0: ; RV64XTHEADBS-NEXT: lui a1, 301408 -; RV64XTHEADBS-NEXT: addiw a1, a1, 722 +; RV64XTHEADBS-NEXT: addi a1, a1, 722 ; RV64XTHEADBS-NEXT: srlw a0, a1, a0 ; RV64XTHEADBS-NEXT: andi a0, a0, 1 ; RV64XTHEADBS-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll index d64fcbe57a851..9c69fe0a6e486 100644 --- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll @@ -497,7 +497,7 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind { ; RV64ZBB-NEXT: and a1, a1, a2 ; RV64ZBB-NEXT: srli a0, a0, 28 ; RV64ZBB-NEXT: lui a2, 986895 -; RV64ZBB-NEXT: addiw a2, a2, 240 +; RV64ZBB-NEXT: addi a2, a2, 240 ; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: sext.w a0, a0 ; RV64ZBB-NEXT: or a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll index fc53f70186b76..6587f0c8c5af7 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll @@ -396,7 +396,7 @@ define i32 @caller_half_on_stack() nounwind { ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IF-NEXT: lui a0, 1048565 -; RV64IF-NEXT: addiw t0, a0, -1792 +; RV64IF-NEXT: addi t0, a0, -1792 ; RV64IF-NEXT: li a0, 1 ; RV64IF-NEXT: li a1, 2 ; RV64IF-NEXT: li a2, 3 diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index 699bb44645711..da67176e3f0ca 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -58,7 +58,7 @@ define i8 @test_cttz_i8(i8 %a) nounwind { ; RV64NOZBB-NEXT: andi a1, a0, 255 ; RV64NOZBB-NEXT: beqz a1, .LBB0_2 ; RV64NOZBB-NEXT: # %bb.1: # %cond.false -; RV64NOZBB-NEXT: addiw a1, a0, -1 +; RV64NOZBB-NEXT: addi a1, a0, -1 ; RV64NOZBB-NEXT: not a0, a0 ; RV64NOZBB-NEXT: and a0, a0, a1 ; RV64NOZBB-NEXT: srli a1, a0, 1 @@ -308,7 +308,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind { ; RV64M-NEXT: negw a1, a0 ; RV64M-NEXT: and a0, a0, a1 ; RV64M-NEXT: lui a1, 30667 -; RV64M-NEXT: addiw a1, a1, 1329 +; RV64M-NEXT: addi a1, a1, 1329 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: srliw a0, a0, 27 ; RV64M-NEXT: lui a1, %hi(.LCPI2_0) @@ -567,7 +567,7 @@ define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind { ; ; RV64NOZBB-LABEL: test_cttz_i8_zero_undef: ; RV64NOZBB: # %bb.0: -; RV64NOZBB-NEXT: addiw a1, a0, -1 +; RV64NOZBB-NEXT: addi a1, a0, -1 ; RV64NOZBB-NEXT: not a0, a0 ; RV64NOZBB-NEXT: and a0, a0, a1 ; RV64NOZBB-NEXT: srli a1, a0, 1 @@ -753,7 +753,7 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { ; RV64M-NEXT: negw a1, a0 ; RV64M-NEXT: and a0, a0, a1 ; RV64M-NEXT: lui a1, 30667 -; RV64M-NEXT: addiw a1, a1, 1329 +; RV64M-NEXT: addi a1, a1, 1329 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: srliw a0, a0, 27 ; RV64M-NEXT: lui a1, %hi(.LCPI6_0) @@ -1315,10 +1315,10 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV64M-NEXT: srli a1, a0, 4 ; RV64M-NEXT: add a0, a0, a1 ; RV64M-NEXT: lui a1, 61681 -; RV64M-NEXT: addiw a1, a1, -241 +; RV64M-NEXT: addi a1, a1, -241 ; RV64M-NEXT: and a0, a0, a1 ; RV64M-NEXT: lui a1, 4112 -; RV64M-NEXT: addiw a1, a1, 257 +; RV64M-NEXT: addi a1, a1, 257 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: srliw a0, a0, 24 ; RV64M-NEXT: ret @@ -1969,10 +1969,10 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; RV64M-NEXT: srli a1, a0, 4 ; RV64M-NEXT: add a0, a0, a1 ; RV64M-NEXT: lui a1, 61681 -; RV64M-NEXT: addiw a1, a1, -241 +; RV64M-NEXT: addi a1, a1, -241 ; RV64M-NEXT: and a0, a0, a1 ; RV64M-NEXT: lui a1, 4112 -; RV64M-NEXT: addiw a1, a1, 257 +; RV64M-NEXT: addi a1, a1, 257 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: srliw a0, a0, 24 ; RV64M-NEXT: ret @@ -2558,10 +2558,10 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV64M-NEXT: srli a1, a0, 4 ; RV64M-NEXT: add a0, a0, a1 ; RV64M-NEXT: lui a1, 61681 -; RV64M-NEXT: addiw a1, a1, -241 +; RV64M-NEXT: addi a1, a1, -241 ; RV64M-NEXT: and a0, a0, a1 ; RV64M-NEXT: lui a1, 4112 -; RV64M-NEXT: addiw a1, a1, 257 +; RV64M-NEXT: addi a1, a1, 257 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: srliw a0, a0, 24 ; RV64M-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll index be1a834e56c22..02072b3e4e5ca 100644 --- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll +++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll @@ -98,7 +98,7 @@ define signext i32 @ctz_dereferencing_pointer(i64* %b) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 63 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -177,7 +177,7 @@ define i64 @ctz_dereferencing_pointer_zext(i32* %b) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 31 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -252,7 +252,7 @@ define signext i32 @ctz1(i32 signext %x) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 31 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -325,7 +325,7 @@ define signext i32 @ctz1_flipped(i32 signext %x) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 31 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -561,7 +561,7 @@ define signext i32 @ctz4(i64 %b) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 63 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -804,7 +804,7 @@ define signext i32 @ctz5(i32 signext %x) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 31 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -877,7 +877,7 @@ define signext i32 @ctz6(i32 signext %x) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 31 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -956,7 +956,7 @@ define signext i32 @globalVar() nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 31 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll index 4672b9a7d9abb..bf19bbd8b1314 100644 --- a/llvm/test/CodeGen/RISCV/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -24,7 +24,7 @@ define i32 @udiv_constant_no_add(i32 %a) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: lui a1, 838861 -; RV64-NEXT: addiw a1, a1, -819 +; RV64-NEXT: addi a1, a1, -819 ; RV64-NEXT: slli a1, a1, 32 ; RV64-NEXT: mulhu a0, a0, a1 ; RV64-NEXT: srli a0, a0, 34 @@ -50,7 +50,7 @@ define i32 @udiv_constant_add(i32 %a) nounwind { ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 32 ; RV64IM-NEXT: lui a2, 149797 -; RV64IM-NEXT: addiw a2, a2, -1755 +; RV64IM-NEXT: addi a2, a2, -1755 ; RV64IM-NEXT: slli a2, a2, 32 ; RV64IM-NEXT: mulhu a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 32 diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll index 2bde0349ccc71..c455b439f0b1e 100644 --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -69,7 +69,7 @@ define i32 @udiv_constant(i32 %a) nounwind { ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a0, a0, 32 ; RV64IM-NEXT: lui a1, 838861 -; RV64IM-NEXT: addiw a1, a1, -819 +; RV64IM-NEXT: addi a1, a1, -819 ; RV64IM-NEXT: slli a1, a1, 32 ; RV64IM-NEXT: mulhu a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 34 @@ -452,7 +452,7 @@ define i16 @udiv16(i16 %a, i16 %b) nounwind { ; RV64IM-LABEL: udiv16: ; RV64IM: # %bb.0: ; RV64IM-NEXT: lui a2, 16 -; RV64IM-NEXT: addiw a2, a2, -1 +; RV64IM-NEXT: addi a2, a2, -1 ; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: divuw a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index 1ecb2c24bb6f2..39ac963051b5b 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -394,7 +394,7 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind { ; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz ; RV64IFD-NEXT: feq.d a1, fa0, fa0 ; RV64IFD-NEXT: seqz a1, a1 -; RV64IFD-NEXT: addiw a1, a1, -1 +; RV64IFD-NEXT: addi a1, a1, -1 ; RV64IFD-NEXT: and a0, a0, a1 ; RV64IFD-NEXT: slli a0, a0, 32 ; RV64IFD-NEXT: srli a0, a0, 32 @@ -420,7 +420,7 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind { ; RV64IZFINXZDINX-NEXT: fcvt.wu.d a1, a0, rtz ; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0 ; RV64IZFINXZDINX-NEXT: seqz a0, a0 -; RV64IZFINXZDINX-NEXT: addiw a0, a0, -1 +; RV64IZFINXZDINX-NEXT: addi a0, a0, -1 ; RV64IZFINXZDINX-NEXT: and a0, a1, a0 ; RV64IZFINXZDINX-NEXT: slli a0, a0, 32 ; RV64IZFINXZDINX-NEXT: srli a0, a0, 32 @@ -1891,20 +1891,20 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV64I-NEXT: lui s1, 1048568 ; RV64I-NEXT: .LBB26_2: # %start ; RV64I-NEXT: lui a0, 4152 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: slli a1, a0, 38 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtdf2@plt ; RV64I-NEXT: blez a0, .LBB26_4 ; RV64I-NEXT: # %bb.3: # %start ; RV64I-NEXT: lui s1, 8 -; RV64I-NEXT: addiw s1, s1, -1 +; RV64I-NEXT: addi s1, s1, -1 ; RV64I-NEXT: .LBB26_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srai a0, a0, 48 @@ -2074,7 +2074,7 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind { ; RV64I-NEXT: call __fixunsdfdi@plt ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 8312 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: slli a1, a0, 37 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __gtdf2@plt @@ -2298,7 +2298,7 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 @@ -2509,7 +2509,7 @@ define zeroext i32 @fcvt_wu_d_sat_zext(double %a) nounwind { ; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz ; RV64IFD-NEXT: feq.d a1, fa0, fa0 ; RV64IFD-NEXT: seqz a1, a1 -; RV64IFD-NEXT: addiw a1, a1, -1 +; RV64IFD-NEXT: addi a1, a1, -1 ; RV64IFD-NEXT: and a0, a0, a1 ; RV64IFD-NEXT: slli a0, a0, 32 ; RV64IFD-NEXT: srli a0, a0, 32 @@ -2535,7 +2535,7 @@ define zeroext i32 @fcvt_wu_d_sat_zext(double %a) nounwind { ; RV64IZFINXZDINX-NEXT: fcvt.wu.d a1, a0, rtz ; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0 ; RV64IZFINXZDINX-NEXT: seqz a0, a0 -; RV64IZFINXZDINX-NEXT: addiw a0, a0, -1 +; RV64IZFINXZDINX-NEXT: addi a0, a0, -1 ; RV64IZFINXZDINX-NEXT: and a0, a1, a0 ; RV64IZFINXZDINX-NEXT: slli a0, a0, 32 ; RV64IZFINXZDINX-NEXT: srli a0, a0, 32 @@ -2735,13 +2735,13 @@ define signext i32 @fcvt_w_d_sat_sext(double %a) nounwind { ; RV64I-NEXT: call __gtdf2@plt ; RV64I-NEXT: blez a0, .LBB34_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: addiw s1, s3, -1 +; RV64I-NEXT: addi s1, s3, -1 ; RV64I-NEXT: .LBB34_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index 066b6fe9c5348..83a4f63add337 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -69,7 +69,7 @@ define void @_Z3foov() { ; CHECK-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: lui a0, 1048572 -; CHECK-NEXT: addiw a0, a0, 928 +; CHECK-NEXT: addi a0, a0, 928 ; CHECK-NEXT: vmsbc.vx v0, v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index 7224f5b79b7a1..235979b122215 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -243,7 +243,7 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind { ; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz ; RV64IF-NEXT: feq.s a1, fa0, fa0 ; RV64IF-NEXT: seqz a1, a1 -; RV64IF-NEXT: addiw a1, a1, -1 +; RV64IF-NEXT: addi a1, a1, -1 ; RV64IF-NEXT: and a0, a0, a1 ; RV64IF-NEXT: slli a0, a0, 32 ; RV64IF-NEXT: srli a0, a0, 32 @@ -263,7 +263,7 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind { ; RV64IZFINX-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZFINX-NEXT: feq.s a0, a0, a0 ; RV64IZFINX-NEXT: seqz a0, a0 -; RV64IZFINX-NEXT: addiw a0, a0, -1 +; RV64IZFINX-NEXT: addi a0, a0, -1 ; RV64IZFINX-NEXT: and a0, a1, a0 ; RV64IZFINX-NEXT: slli a0, a0, 32 ; RV64IZFINX-NEXT: srli a0, a0, 32 @@ -1528,13 +1528,13 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV64I-NEXT: blez a0, .LBB24_4 ; RV64I-NEXT: # %bb.3: # %start ; RV64I-NEXT: lui s1, 8 -; RV64I-NEXT: addiw s1, s1, -1 +; RV64I-NEXT: addi s1, s1, -1 ; RV64I-NEXT: .LBB24_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srai a0, a0, 48 @@ -1874,7 +1874,7 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind { ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 @@ -2057,7 +2057,7 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind { ; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz ; RV64IF-NEXT: feq.s a1, fa0, fa0 ; RV64IF-NEXT: seqz a1, a1 -; RV64IF-NEXT: addiw a1, a1, -1 +; RV64IF-NEXT: addi a1, a1, -1 ; RV64IF-NEXT: and a0, a0, a1 ; RV64IF-NEXT: slli a0, a0, 32 ; RV64IF-NEXT: srli a0, a0, 32 @@ -2077,7 +2077,7 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind { ; RV64IZFINX-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZFINX-NEXT: feq.s a0, a0, a0 ; RV64IZFINX-NEXT: seqz a0, a0 -; RV64IZFINX-NEXT: addiw a0, a0, -1 +; RV64IZFINX-NEXT: addi a0, a0, -1 ; RV64IZFINX-NEXT: and a0, a1, a0 ; RV64IZFINX-NEXT: slli a0, a0, 32 ; RV64IZFINX-NEXT: srli a0, a0, 32 @@ -2238,13 +2238,13 @@ define signext i32 @fcvt_w_s_sat_sext(float %a) nounwind { ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: blez a0, .LBB32_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: addiw s1, s3, -1 +; RV64I-NEXT: addi s1, s3, -1 ; RV64I-NEXT: .LBB32_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll index 434c9f05bd16f..5a6e0baf752d0 100644 --- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll +++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll @@ -325,7 +325,7 @@ define dso_local void @inc_g_i32() nounwind { ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(g_4_i32) ; RV64I-NEXT: lw a1, %lo(g_4_i32)(a0) -; RV64I-NEXT: addiw a1, a1, 1 +; RV64I-NEXT: addi a1, a1, 1 ; RV64I-NEXT: sw a1, %lo(g_4_i32)(a0) ; RV64I-NEXT: ret ; @@ -334,7 +334,7 @@ define dso_local void @inc_g_i32() nounwind { ; RV64I-MEDIUM-NEXT: .Lpcrel_hi8: ; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(g_4_i32) ; RV64I-MEDIUM-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi8)(a0) -; RV64I-MEDIUM-NEXT: addiw a1, a1, 1 +; RV64I-MEDIUM-NEXT: addi a1, a1, 1 ; RV64I-MEDIUM-NEXT: sw a1, %pcrel_lo(.Lpcrel_hi8)(a0) ; RV64I-MEDIUM-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index 4dadd65283001..2d3f40e15fe43 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -1703,7 +1703,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV64IZFH-NEXT: feq.h a1, fa0, fa0 ; RV64IZFH-NEXT: seqz a1, a1 -; RV64IZFH-NEXT: addiw a1, a1, -1 +; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a0, a1 ; RV64IZFH-NEXT: slli a0, a0, 32 ; RV64IZFH-NEXT: srli a0, a0, 32 @@ -1723,7 +1723,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV64IDZFH-NEXT: feq.h a1, fa0, fa0 ; RV64IDZFH-NEXT: seqz a1, a1 -; RV64IDZFH-NEXT: addiw a1, a1, -1 +; RV64IDZFH-NEXT: addi a1, a1, -1 ; RV64IDZFH-NEXT: and a0, a0, a1 ; RV64IDZFH-NEXT: slli a0, a0, 32 ; RV64IDZFH-NEXT: srli a0, a0, 32 @@ -1743,7 +1743,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addiw a0, a0, -1 +; RV64IZHINX-NEXT: addi a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: slli a0, a0, 32 ; RV64IZHINX-NEXT: srli a0, a0, 32 @@ -1763,7 +1763,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; RV64IZDINXZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZDINXZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZDINXZHINX-NEXT: seqz a0, a0 -; RV64IZDINXZHINX-NEXT: addiw a0, a0, -1 +; RV64IZDINXZHINX-NEXT: addi a0, a0, -1 ; RV64IZDINXZHINX-NEXT: and a0, a1, a0 ; RV64IZDINXZHINX-NEXT: slli a0, a0, 32 ; RV64IZDINXZHINX-NEXT: srli a0, a0, 32 @@ -1863,7 +1863,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; RV64ID-LP64-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64ID-LP64-NEXT: feq.s a1, fa5, fa5 ; RV64ID-LP64-NEXT: seqz a1, a1 -; RV64ID-LP64-NEXT: addiw a1, a1, -1 +; RV64ID-LP64-NEXT: addi a1, a1, -1 ; RV64ID-LP64-NEXT: and a0, a0, a1 ; RV64ID-LP64-NEXT: slli a0, a0, 32 ; RV64ID-LP64-NEXT: srli a0, a0, 32 @@ -1893,7 +1893,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; RV64ID-NEXT: fcvt.wu.s a0, fa0, rtz ; RV64ID-NEXT: feq.s a1, fa0, fa0 ; RV64ID-NEXT: seqz a1, a1 -; RV64ID-NEXT: addiw a1, a1, -1 +; RV64ID-NEXT: addi a1, a1, -1 ; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: slli a0, a0, 32 ; RV64ID-NEXT: srli a0, a0, 32 @@ -1917,7 +1917,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; CHECK64-IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; CHECK64-IZFHMIN-NEXT: seqz a1, a1 -; CHECK64-IZFHMIN-NEXT: addiw a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 ; CHECK64-IZFHMIN-NEXT: and a0, a0, a1 ; CHECK64-IZFHMIN-NEXT: slli a0, a0, 32 ; CHECK64-IZFHMIN-NEXT: srli a0, a0, 32 @@ -1939,7 +1939,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; CHECK64-IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; CHECK64-IZHINXMIN-NEXT: feq.s a0, a0, a0 ; CHECK64-IZHINXMIN-NEXT: seqz a0, a0 -; CHECK64-IZHINXMIN-NEXT: addiw a0, a0, -1 +; CHECK64-IZHINXMIN-NEXT: addi a0, a0, -1 ; CHECK64-IZHINXMIN-NEXT: and a0, a1, a0 ; CHECK64-IZHINXMIN-NEXT: slli a0, a0, 32 ; CHECK64-IZHINXMIN-NEXT: srli a0, a0, 32 @@ -1961,7 +1961,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind { ; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; CHECK64-IZDINXZHINXMIN-NEXT: feq.s a0, a0, a0 ; CHECK64-IZDINXZHINXMIN-NEXT: seqz a0, a0 -; CHECK64-IZDINXZHINXMIN-NEXT: addiw a0, a0, -1 +; CHECK64-IZDINXZHINXMIN-NEXT: addi a0, a0, -1 ; CHECK64-IZDINXZHINXMIN-NEXT: and a0, a1, a0 ; CHECK64-IZDINXZHINXMIN-NEXT: slli a0, a0, 32 ; CHECK64-IZDINXZHINXMIN-NEXT: srli a0, a0, 32 @@ -6551,13 +6551,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64I-NEXT: blez a0, .LBB32_4 ; RV64I-NEXT: # %bb.3: # %start ; RV64I-NEXT: lui s1, 8 -; RV64I-NEXT: addiw s1, s1, -1 +; RV64I-NEXT: addi s1, s1, -1 ; RV64I-NEXT: .LBB32_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srai a0, a0, 48 @@ -7511,7 +7511,7 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind { ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 @@ -8128,7 +8128,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV64IZFH-NEXT: feq.h a1, fa0, fa0 ; RV64IZFH-NEXT: seqz a1, a1 -; RV64IZFH-NEXT: addiw a1, a1, -1 +; RV64IZFH-NEXT: addi a1, a1, -1 ; RV64IZFH-NEXT: and a0, a0, a1 ; RV64IZFH-NEXT: slli a0, a0, 32 ; RV64IZFH-NEXT: srli a0, a0, 32 @@ -8148,7 +8148,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV64IDZFH-NEXT: feq.h a1, fa0, fa0 ; RV64IDZFH-NEXT: seqz a1, a1 -; RV64IDZFH-NEXT: addiw a1, a1, -1 +; RV64IDZFH-NEXT: addi a1, a1, -1 ; RV64IDZFH-NEXT: and a0, a0, a1 ; RV64IDZFH-NEXT: slli a0, a0, 32 ; RV64IDZFH-NEXT: srli a0, a0, 32 @@ -8168,7 +8168,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addiw a0, a0, -1 +; RV64IZHINX-NEXT: addi a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: slli a0, a0, 32 ; RV64IZHINX-NEXT: srli a0, a0, 32 @@ -8188,7 +8188,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; RV64IZDINXZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZDINXZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZDINXZHINX-NEXT: seqz a0, a0 -; RV64IZDINXZHINX-NEXT: addiw a0, a0, -1 +; RV64IZDINXZHINX-NEXT: addi a0, a0, -1 ; RV64IZDINXZHINX-NEXT: and a0, a1, a0 ; RV64IZDINXZHINX-NEXT: slli a0, a0, 32 ; RV64IZDINXZHINX-NEXT: srli a0, a0, 32 @@ -8290,7 +8290,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; RV64ID-LP64-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64ID-LP64-NEXT: feq.s a1, fa5, fa5 ; RV64ID-LP64-NEXT: seqz a1, a1 -; RV64ID-LP64-NEXT: addiw a1, a1, -1 +; RV64ID-LP64-NEXT: addi a1, a1, -1 ; RV64ID-LP64-NEXT: and a0, a0, a1 ; RV64ID-LP64-NEXT: slli a0, a0, 32 ; RV64ID-LP64-NEXT: srli a0, a0, 32 @@ -8320,7 +8320,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; RV64ID-NEXT: fcvt.wu.s a0, fa0, rtz ; RV64ID-NEXT: feq.s a1, fa0, fa0 ; RV64ID-NEXT: seqz a1, a1 -; RV64ID-NEXT: addiw a1, a1, -1 +; RV64ID-NEXT: addi a1, a1, -1 ; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: slli a0, a0, 32 ; RV64ID-NEXT: srli a0, a0, 32 @@ -8344,7 +8344,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; CHECK64-IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; CHECK64-IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; CHECK64-IZFHMIN-NEXT: seqz a1, a1 -; CHECK64-IZFHMIN-NEXT: addiw a1, a1, -1 +; CHECK64-IZFHMIN-NEXT: addi a1, a1, -1 ; CHECK64-IZFHMIN-NEXT: and a0, a0, a1 ; CHECK64-IZFHMIN-NEXT: slli a0, a0, 32 ; CHECK64-IZFHMIN-NEXT: srli a0, a0, 32 @@ -8366,7 +8366,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; CHECK64-IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; CHECK64-IZHINXMIN-NEXT: feq.s a0, a0, a0 ; CHECK64-IZHINXMIN-NEXT: seqz a0, a0 -; CHECK64-IZHINXMIN-NEXT: addiw a0, a0, -1 +; CHECK64-IZHINXMIN-NEXT: addi a0, a0, -1 ; CHECK64-IZHINXMIN-NEXT: and a0, a1, a0 ; CHECK64-IZHINXMIN-NEXT: slli a0, a0, 32 ; CHECK64-IZHINXMIN-NEXT: srli a0, a0, 32 @@ -8388,7 +8388,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind { ; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; CHECK64-IZDINXZHINXMIN-NEXT: feq.s a0, a0, a0 ; CHECK64-IZDINXZHINXMIN-NEXT: seqz a0, a0 -; CHECK64-IZDINXZHINXMIN-NEXT: addiw a0, a0, -1 +; CHECK64-IZDINXZHINXMIN-NEXT: addi a0, a0, -1 ; CHECK64-IZDINXZHINXMIN-NEXT: and a0, a1, a0 ; CHECK64-IZDINXZHINXMIN-NEXT: slli a0, a0, 32 ; CHECK64-IZDINXZHINXMIN-NEXT: srli a0, a0, 32 @@ -8518,13 +8518,13 @@ define signext i32 @fcvt_w_h_sat_sext(half %a) nounwind { ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: blez a0, .LBB40_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: addiw s1, s3, -1 +; RV64I-NEXT: addi s1, s3, -1 ; RV64I-NEXT: .LBB40_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll index 70308857b2631..55c30046366d7 100644 --- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -259,21 +259,13 @@ define ptr @offset_sh3add() { } define dso_local void @read_modify_write() local_unnamed_addr nounwind { -; RV32-LABEL: read_modify_write: -; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a0, %hi(s+160) -; RV32-NEXT: lw a1, %lo(s+160)(a0) -; RV32-NEXT: addi a1, a1, 10 -; RV32-NEXT: sw a1, %lo(s+160)(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: read_modify_write: -; RV64: # %bb.0: # %entry -; RV64-NEXT: lui a0, %hi(s+160) -; RV64-NEXT: lw a1, %lo(s+160)(a0) -; RV64-NEXT: addiw a1, a1, 10 -; RV64-NEXT: sw a1, %lo(s+160)(a0) -; RV64-NEXT: ret +; CHECK-LABEL: read_modify_write: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, %hi(s+160) +; CHECK-NEXT: lw a1, %lo(s+160)(a0) +; CHECK-NEXT: addi a1, a1, 10 +; CHECK-NEXT: sw a1, %lo(s+160)(a0) +; CHECK-NEXT: ret entry: %x = load i32, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1), align 4 %y = add i32 %x, 10 @@ -375,21 +367,13 @@ define void @store_sh3add() { } define dso_local void @rmw_addi_addi() nounwind { -; RV32-LABEL: rmw_addi_addi: -; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a0, %hi(bar+3211) -; RV32-NEXT: lbu a1, %lo(bar+3211)(a0) -; RV32-NEXT: addi a1, a1, 10 -; RV32-NEXT: sb a1, %lo(bar+3211)(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: rmw_addi_addi: -; RV64: # %bb.0: # %entry -; RV64-NEXT: lui a0, %hi(bar+3211) -; RV64-NEXT: lbu a1, %lo(bar+3211)(a0) -; RV64-NEXT: addiw a1, a1, 10 -; RV64-NEXT: sb a1, %lo(bar+3211)(a0) -; RV64-NEXT: ret +; CHECK-LABEL: rmw_addi_addi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, %hi(bar+3211) +; CHECK-NEXT: lbu a1, %lo(bar+3211)(a0) +; CHECK-NEXT: addi a1, a1, 10 +; CHECK-NEXT: sb a1, %lo(bar+3211)(a0) +; CHECK-NEXT: ret entry: %0 = load i8, ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 3211) %1 = add i8 %0, 10 diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll index 738318e4bd677..e191933b42338 100644 --- a/llvm/test/CodeGen/RISCV/imm.ll +++ b/llvm/test/CodeGen/RISCV/imm.ll @@ -588,35 +588,35 @@ define i64 @imm64_6() nounwind { ; RV64I-LABEL: imm64_6: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a0, 9321 -; RV64I-NEXT: addiw a0, a0, -1329 +; RV64I-NEXT: addi a0, a0, -1329 ; RV64I-NEXT: slli a0, a0, 35 ; RV64I-NEXT: ret ; ; RV64IZBA-LABEL: imm64_6: ; RV64IZBA: # %bb.0: ; RV64IZBA-NEXT: lui a0, 9321 -; RV64IZBA-NEXT: addiw a0, a0, -1329 +; RV64IZBA-NEXT: addi a0, a0, -1329 ; RV64IZBA-NEXT: slli a0, a0, 35 ; RV64IZBA-NEXT: ret ; ; RV64IZBB-LABEL: imm64_6: ; RV64IZBB: # %bb.0: ; RV64IZBB-NEXT: lui a0, 9321 -; RV64IZBB-NEXT: addiw a0, a0, -1329 +; RV64IZBB-NEXT: addi a0, a0, -1329 ; RV64IZBB-NEXT: slli a0, a0, 35 ; RV64IZBB-NEXT: ret ; ; RV64IZBS-LABEL: imm64_6: ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: lui a0, 9321 -; RV64IZBS-NEXT: addiw a0, a0, -1329 +; RV64IZBS-NEXT: addi a0, a0, -1329 ; RV64IZBS-NEXT: slli a0, a0, 35 ; RV64IZBS-NEXT: ret ; ; RV64IXTHEADBB-LABEL: imm64_6: ; RV64IXTHEADBB: # %bb.0: ; RV64IXTHEADBB-NEXT: lui a0, 9321 -; RV64IXTHEADBB-NEXT: addiw a0, a0, -1329 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1329 ; RV64IXTHEADBB-NEXT: slli a0, a0, 35 ; RV64IXTHEADBB-NEXT: ret ret i64 1311768464867721216 ; 0x1234_5678_0000_0000 @@ -709,7 +709,7 @@ define i64 @imm64_8() nounwind { ; RV64IZBA-LABEL: imm64_8: ; RV64IZBA: # %bb.0: ; RV64IZBA-NEXT: lui a0, 596523 -; RV64IZBA-NEXT: addiw a0, a0, 965 +; RV64IZBA-NEXT: addi a0, a0, 965 ; RV64IZBA-NEXT: slli.uw a0, a0, 13 ; RV64IZBA-NEXT: addi a0, a0, -1347 ; RV64IZBA-NEXT: slli a0, a0, 12 @@ -2298,7 +2298,7 @@ define i64 @imm_12900936431479() { ; RV64IZBA-LABEL: imm_12900936431479: ; RV64IZBA: # %bb.0: ; RV64IZBA-NEXT: lui a0, 768956 -; RV64IZBA-NEXT: addiw a0, a0, -1093 +; RV64IZBA-NEXT: addi a0, a0, -1093 ; RV64IZBA-NEXT: slli.uw a0, a0, 12 ; RV64IZBA-NEXT: addi a0, a0, 1911 ; RV64IZBA-NEXT: ret @@ -2353,7 +2353,7 @@ define i64 @imm_12900918536874() { ; RV64IZBA-LABEL: imm_12900918536874: ; RV64IZBA: # %bb.0: ; RV64IZBA-NEXT: lui a0, 768955 -; RV64IZBA-NEXT: addiw a0, a0, -1365 +; RV64IZBA-NEXT: addi a0, a0, -1365 ; RV64IZBA-NEXT: slli.uw a0, a0, 12 ; RV64IZBA-NEXT: addi a0, a0, -1366 ; RV64IZBA-NEXT: ret @@ -2408,7 +2408,7 @@ define i64 @imm_12900925247761() { ; RV64IZBA-LABEL: imm_12900925247761: ; RV64IZBA: # %bb.0: ; RV64IZBA-NEXT: lui a0, 768955 -; RV64IZBA-NEXT: addiw a0, a0, 273 +; RV64IZBA-NEXT: addi a0, a0, 273 ; RV64IZBA-NEXT: slli.uw a0, a0, 12 ; RV64IZBA-NEXT: addi a0, a0, 273 ; RV64IZBA-NEXT: ret @@ -3030,7 +3030,7 @@ define i64 @imm64_same_lo_hi_negative() nounwind { ; RV64IZBA-LABEL: imm64_same_lo_hi_negative: ; RV64IZBA: # %bb.0: ; RV64IZBA-NEXT: lui a0, 526344 -; RV64IZBA-NEXT: addiw a0, a0, 128 +; RV64IZBA-NEXT: addi a0, a0, 128 ; RV64IZBA-NEXT: slli a1, a0, 32 ; RV64IZBA-NEXT: add.uw a0, a0, a1 ; RV64IZBA-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll index bcca8064e97a2..9e7f2e9525d3b 100644 --- a/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll +++ b/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll @@ -264,7 +264,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { ; ; RV64-LABEL: add_ultcmp_i16_i8: ; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, -128 +; RV64-NEXT: addi a0, a0, -128 ; RV64-NEXT: slli a0, a0, 48 ; RV64-NEXT: srli a0, a0, 56 ; RV64-NEXT: sltiu a0, a0, 255 @@ -431,7 +431,7 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind { ; ; RV64-LABEL: add_ulecmp_i16_i8: ; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, -128 +; RV64-NEXT: addi a0, a0, -128 ; RV64-NEXT: slli a0, a0, 48 ; RV64-NEXT: srli a0, a0, 56 ; RV64-NEXT: sltiu a0, a0, 255 @@ -457,7 +457,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { ; ; RV64I-LABEL: add_ugecmp_i16_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: addi a0, a0, 128 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 256 @@ -474,7 +474,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ugecmp_i16_i8: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 128 +; RV64ZBB-NEXT: addi a0, a0, 128 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 256 ; RV64ZBB-NEXT: xori a0, a0, 1 @@ -645,7 +645,7 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind { ; ; RV64I-LABEL: add_ugtcmp_i16_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: addi a0, a0, 128 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 256 @@ -662,7 +662,7 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ugtcmp_i16_i8: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 128 +; RV64ZBB-NEXT: addi a0, a0, 128 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 256 ; RV64ZBB-NEXT: xori a0, a0, 1 @@ -751,7 +751,7 @@ define i1 @add_ugecmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind { ; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_cmp: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: zext.h a1, a1 -; RV64ZBB-NEXT: addiw a0, a0, 128 +; RV64ZBB-NEXT: addi a0, a0, 128 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltu a0, a0, a1 ; RV64ZBB-NEXT: xori a0, a0, 1 @@ -774,7 +774,7 @@ define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind { ; ; RV64I-LABEL: add_ugecmp_bad_i8_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: addi a0, a0, 128 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 128 @@ -791,7 +791,7 @@ define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ugecmp_bad_i8_i16: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 128 +; RV64ZBB-NEXT: addi a0, a0, 128 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 128 ; RV64ZBB-NEXT: xori a0, a0, 1 @@ -814,7 +814,7 @@ define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind { ; ; RV64I-LABEL: add_ugecmp_bad_i16_i8_c0notpoweroftwo: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 192 +; RV64I-NEXT: addi a0, a0, 192 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 256 @@ -831,7 +831,7 @@ define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_c0notpoweroftwo: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 192 +; RV64ZBB-NEXT: addi a0, a0, 192 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 256 ; RV64ZBB-NEXT: xori a0, a0, 1 @@ -854,7 +854,7 @@ define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind { ; ; RV64I-LABEL: add_ugecmp_bad_i16_i8_c1notpoweroftwo: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: addi a0, a0, 128 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 768 @@ -871,7 +871,7 @@ define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_c1notpoweroftwo: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 128 +; RV64ZBB-NEXT: addi a0, a0, 128 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 768 ; RV64ZBB-NEXT: xori a0, a0, 1 @@ -894,7 +894,7 @@ define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind { ; ; RV64I-LABEL: add_ugecmp_bad_i16_i8_magic: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 64 +; RV64I-NEXT: addi a0, a0, 64 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 256 @@ -911,7 +911,7 @@ define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ugecmp_bad_i16_i8_magic: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 64 +; RV64ZBB-NEXT: addi a0, a0, 64 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 256 ; RV64ZBB-NEXT: xori a0, a0, 1 @@ -934,7 +934,7 @@ define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind { ; ; RV64I-LABEL: add_ugecmp_bad_i16_i4: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 8 +; RV64I-NEXT: addi a0, a0, 8 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 16 @@ -951,7 +951,7 @@ define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ugecmp_bad_i16_i4: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 8 +; RV64ZBB-NEXT: addi a0, a0, 8 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 16 ; RV64ZBB-NEXT: xori a0, a0, 1 @@ -974,7 +974,7 @@ define i1 @add_ugecmp_bad_i24_i8(i24 %x) nounwind { ; ; RV64-LABEL: add_ugecmp_bad_i24_i8: ; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, 128 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: slli a0, a0, 40 ; RV64-NEXT: srli a0, a0, 40 ; RV64-NEXT: sltiu a0, a0, 256 diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll b/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll index 03419932d030a..21254b6302038 100644 --- a/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll +++ b/llvm/test/CodeGen/RISCV/machine-outliner-throw.ll @@ -13,7 +13,7 @@ define i32 @func1(i32 %x) #0 { ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: mul a0, a0, a0 -; CHECK-NEXT: addiw s0, a0, 1 +; CHECK-NEXT: addi s0, a0, 1 ; CHECK-NEXT: li a0, 4 ; CHECK-NEXT: call __cxa_allocate_exception@plt ; CHECK-NEXT: sw s0, 0(a0) @@ -40,7 +40,7 @@ define i32 @func2(i32 %x) #0 { ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: mul a0, a0, a0 -; CHECK-NEXT: addiw s0, a0, 1 +; CHECK-NEXT: addi s0, a0, 1 ; CHECK-NEXT: li a0, 4 ; CHECK-NEXT: call __cxa_allocate_exception@plt ; CHECK-NEXT: sw s0, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll index 932bd2e13d62a..d831a2a002a3e 100644 --- a/llvm/test/CodeGen/RISCV/memcpy.ll +++ b/llvm/test/CodeGen/RISCV/memcpy.ll @@ -167,7 +167,7 @@ define void @t2(ptr nocapture %C) nounwind { ; RV64-FAST-NEXT: ld a2, %lo(.L.str2)(a1) ; RV64-FAST-NEXT: sd a2, 0(a0) ; RV64-FAST-NEXT: lui a2, 1156 -; RV64-FAST-NEXT: addiw a2, a2, 332 +; RV64-FAST-NEXT: addi a2, a2, 332 ; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str2) ; RV64-FAST-NEXT: ld a3, 24(a1) ; RV64-FAST-NEXT: ld a4, 16(a1) @@ -332,10 +332,10 @@ define void @t5(ptr nocapture %C) nounwind { ; RV64-FAST-LABEL: t5: ; RV64-FAST: # %bb.0: # %entry ; RV64-FAST-NEXT: lui a1, 1349 -; RV64-FAST-NEXT: addiw a1, a1, 857 +; RV64-FAST-NEXT: addi a1, a1, 857 ; RV64-FAST-NEXT: sw a1, 3(a0) ; RV64-FAST-NEXT: lui a1, 365861 -; RV64-FAST-NEXT: addiw a1, a1, -1980 +; RV64-FAST-NEXT: addi a1, a1, -1980 ; RV64-FAST-NEXT: sw a1, 0(a0) ; RV64-FAST-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index 42f998e68bb6e..7c3294fa81dcf 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -551,7 +551,7 @@ define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, ptr %p) { ; ; RV64-LABEL: uaddo_i8_increment_noncanonical_1: ; RV64: # %bb.0: -; RV64-NEXT: addiw a2, a0, 1 +; RV64-NEXT: addi a2, a0, 1 ; RV64-NEXT: andi a0, a2, 255 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: sb a2, 0(a1) @@ -594,7 +594,7 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) { ; ; RV64-LABEL: uaddo_i16_increment_noncanonical_3: ; RV64: # %bb.0: -; RV64-NEXT: addiw a2, a0, 1 +; RV64-NEXT: addi a2, a0, 1 ; RV64-NEXT: slli a0, a2, 48 ; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: seqz a0, a0 @@ -672,7 +672,7 @@ define i1 @uaddo_i32_decrement_alt(i32 signext %x, ptr %p) { ; RV64-LABEL: uaddo_i32_decrement_alt: ; RV64: # %bb.0: ; RV64-NEXT: snez a2, a0 -; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: sw a0, 0(a1) ; RV64-NEXT: mv a0, a2 ; RV64-NEXT: ret @@ -915,7 +915,7 @@ define i1 @usubo_ult_constant_op1_i16(i16 %x, ptr %p) { ; RV64: # %bb.0: ; RV64-NEXT: slli a2, a0, 48 ; RV64-NEXT: srli a2, a2, 48 -; RV64-NEXT: addiw a3, a0, -44 +; RV64-NEXT: addi a3, a0, -44 ; RV64-NEXT: sltiu a0, a2, 44 ; RV64-NEXT: sh a3, 0(a1) ; RV64-NEXT: ret @@ -939,7 +939,7 @@ define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) { ; RV64: # %bb.0: ; RV64-NEXT: andi a2, a0, 255 ; RV64-NEXT: sltiu a2, a2, 45 -; RV64-NEXT: addiw a0, a0, -45 +; RV64-NEXT: addi a0, a0, -45 ; RV64-NEXT: sb a0, 0(a1) ; RV64-NEXT: mv a0, a2 ; RV64-NEXT: ret @@ -962,7 +962,7 @@ define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) { ; RV64-LABEL: usubo_eq_constant1_op1_i32: ; RV64: # %bb.0: ; RV64-NEXT: sext.w a2, a0 -; RV64-NEXT: addiw a3, a0, -1 +; RV64-NEXT: addi a3, a0, -1 ; RV64-NEXT: seqz a0, a2 ; RV64-NEXT: sw a3, 0(a1) ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rem.ll b/llvm/test/CodeGen/RISCV/rem.ll index 3d5fd3fd43317..feece1f54ffc6 100644 --- a/llvm/test/CodeGen/RISCV/rem.ll +++ b/llvm/test/CodeGen/RISCV/rem.ll @@ -578,7 +578,7 @@ define i16 @urem16(i16 %a, i16 %b) nounwind { ; RV64IM-LABEL: urem16: ; RV64IM: # %bb.0: ; RV64IM-NEXT: lui a2, 16 -; RV64IM-NEXT: addiw a2, a2, -1 +; RV64IM-NEXT: addi a2, a2, -1 ; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: remuw a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll index 61477589f8455..c343ef5b451de 100644 --- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll +++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll @@ -12,7 +12,7 @@ define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) { ; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: addiw a2, a2, 4 +; CHECK-NEXT: addi a2, a2, 4 ; CHECK-NEXT: sw a2, 0(a0) ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: addi a0, a0, 4 @@ -62,9 +62,9 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) { ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lw a5, -4(a4) ; CHECK-NEXT: lw a6, 0(a4) -; CHECK-NEXT: addiw a5, a5, 4 +; CHECK-NEXT: addi a5, a5, 4 ; CHECK-NEXT: sw a5, -4(a4) -; CHECK-NEXT: addiw a6, a6, 4 +; CHECK-NEXT: addi a6, a6, 4 ; CHECK-NEXT: sw a6, 0(a4) ; CHECK-NEXT: addi a3, a3, 2 ; CHECK-NEXT: addi a4, a4, 8 @@ -75,7 +75,7 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) { ; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: lw a1, 0(a0) -; CHECK-NEXT: addiw a1, a1, 4 +; CHECK-NEXT: addi a1, a1, 4 ; CHECK-NEXT: sw a1, 0(a0) ; CHECK-NEXT: .LBB1_7: # %for.cond.cleanup ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll b/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll index f957d25f5c490..6a441e2b9f67f 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll @@ -9,10 +9,10 @@ define i32 @foo(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: mul a0, a0, a0 -; CHECK-NEXT: addiw a0, a0, 1 +; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: mul a0, a0, a0 ; CHECK-NEXT: add a0, a0, a2 -; CHECK-NEXT: addiw a0, a0, 1 +; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: sllw a0, a0, a1 ; CHECK-NEXT: ret %b = mul i32 %x, %x diff --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll index 1d495b11f6140..dad20b2d19464 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll @@ -1659,14 +1659,14 @@ define signext i32 @sext_addiw_zext(i32 zeroext %a) nounwind { define zeroext i32 @zext_addiw_aext(i32 %a) nounwind { ; RV64I-LABEL: zext_addiw_aext: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 7 +; RV64I-NEXT: addi a0, a0, 7 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: zext_addiw_aext: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: addiw a0, a0, 7 +; RV64ZBA-NEXT: addi a0, a0, 7 ; RV64ZBA-NEXT: zext.w a0, a0 ; RV64ZBA-NEXT: ret %1 = add i32 %a, 7 @@ -1676,14 +1676,14 @@ define zeroext i32 @zext_addiw_aext(i32 %a) nounwind { define zeroext i32 @zext_addiw_sext(i32 signext %a) nounwind { ; RV64I-LABEL: zext_addiw_sext: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 8 +; RV64I-NEXT: addi a0, a0, 8 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: zext_addiw_sext: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: addiw a0, a0, 8 +; RV64ZBA-NEXT: addi a0, a0, 8 ; RV64ZBA-NEXT: zext.w a0, a0 ; RV64ZBA-NEXT: ret %1 = add i32 %a, 8 @@ -1693,14 +1693,14 @@ define zeroext i32 @zext_addiw_sext(i32 signext %a) nounwind { define zeroext i32 @zext_addiw_zext(i32 zeroext %a) nounwind { ; RV64I-LABEL: zext_addiw_zext: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 9 +; RV64I-NEXT: addi a0, a0, 9 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: zext_addiw_zext: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: addiw a0, a0, 9 +; RV64ZBA-NEXT: addi a0, a0, 9 ; RV64ZBA-NEXT: zext.w a0, a0 ; RV64ZBA-NEXT: ret %1 = add i32 %a, 9 diff --git a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll index 4a9d8b08a4b2f..f38aa71fb158d 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll @@ -113,7 +113,7 @@ define signext i32 @test9(ptr %0, i64 %1) { ; RV64I-LABEL: test9: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, 1 -; RV64I-NEXT: addiw a2, a2, 1 +; RV64I-NEXT: addi a2, a2, 1 ; RV64I-NEXT: addw a1, a1, a2 ; RV64I-NEXT: slli a1, a1, 2 ; RV64I-NEXT: add a0, a0, a1 @@ -133,7 +133,7 @@ define signext i32 @test10(ptr %0, i64 %1) { ; RV64I-LABEL: test10: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, 30141 -; RV64I-NEXT: addiw a2, a2, -747 +; RV64I-NEXT: addi a2, a2, -747 ; RV64I-NEXT: subw a2, a2, a1 ; RV64I-NEXT: slli a2, a2, 2 ; RV64I-NEXT: add a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll index eea04ae03f8d6..ab1691543c78a 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll @@ -8,10 +8,10 @@ define signext i32 @addw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: not a2, a0 ; CHECK-NEXT: add a2, a2, a1 -; CHECK-NEXT: addiw a3, a0, 1 +; CHECK-NEXT: addi a3, a0, 1 ; CHECK-NEXT: mul a3, a2, a3 ; CHECK-NEXT: subw a1, a1, a0 -; CHECK-NEXT: addiw a1, a1, -2 +; CHECK-NEXT: addi a1, a1, -2 ; CHECK-NEXT: slli a1, a1, 32 ; CHECK-NEXT: slli a2, a2, 32 ; CHECK-NEXT: mulhu a1, a2, a1 @@ -56,7 +56,7 @@ define signext i32 @subw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin ; CHECK-NEXT: add a3, a2, a1 ; CHECK-NEXT: mul a2, a3, a2 ; CHECK-NEXT: subw a1, a1, a0 -; CHECK-NEXT: addiw a1, a1, -2 +; CHECK-NEXT: addi a1, a1, -2 ; CHECK-NEXT: slli a1, a1, 32 ; CHECK-NEXT: slli a3, a3, 32 ; CHECK-NEXT: mulhu a1, a3, a1 diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll index 90a10d02636bd..c8cd710fe9ae4 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -173,7 +173,7 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { ; ; RV64XTHEADBB-LABEL: log2_ceil_i32: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: addiw a0, a0, -1 +; RV64XTHEADBB-NEXT: addi a0, a0, -1 ; RV64XTHEADBB-NEXT: not a0, a0 ; RV64XTHEADBB-NEXT: slli a0, a0, 32 ; RV64XTHEADBB-NEXT: th.ff0 a0, a0 @@ -774,7 +774,7 @@ define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: srli a2, a0, 8 ; RV64I-NEXT: lui a3, 16 -; RV64I-NEXT: addiw a3, a3, -256 +; RV64I-NEXT: addi a3, a3, -256 ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: srliw a4, a0, 24 ; RV64I-NEXT: or a2, a2, a4 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll index 07a4c093f06e6..a7af8ab348e99 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll @@ -30,7 +30,7 @@ define signext i32 @orcb32_knownbits(i32 signext %a) nounwind { ; RV64ZBB-NEXT: lui a1, 1044480 ; RV64ZBB-NEXT: and a0, a0, a1 ; RV64ZBB-NEXT: lui a1, 2048 -; RV64ZBB-NEXT: addiw a1, a1, 1 +; RV64ZBB-NEXT: addi a1, a1, 1 ; RV64ZBB-NEXT: or a0, a0, a1 ; RV64ZBB-NEXT: orc.b a0, a0 ; RV64ZBB-NEXT: sext.w a0, a0 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 18f1574b53526..f038af255a411 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -169,7 +169,7 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { ; ; RV64ZBB-LABEL: log2_ceil_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, -1 +; RV64ZBB-NEXT: addi a0, a0, -1 ; RV64ZBB-NEXT: clzw a0, a0 ; RV64ZBB-NEXT: li a1, 32 ; RV64ZBB-NEXT: sub a0, a1, a0 @@ -768,11 +768,11 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind { define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind { ; RV64I-LABEL: ctpop_v2i32_ult_two: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a2, a0, -1 +; RV64I-NEXT: addi a2, a0, -1 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: seqz a0, a0 -; RV64I-NEXT: addiw a2, a1, -1 +; RV64I-NEXT: addi a2, a1, -1 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: sext.w a1, a1 ; RV64I-NEXT: seqz a1, a1 @@ -793,11 +793,11 @@ define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind { define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind { ; RV64I-LABEL: ctpop_v2i32_ugt_one: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a2, a0, -1 +; RV64I-NEXT: addi a2, a0, -1 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a2, a1, -1 +; RV64I-NEXT: addi a2, a1, -1 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: sext.w a1, a1 ; RV64I-NEXT: snez a1, a1 @@ -822,13 +822,13 @@ define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a2, a1 ; RV64I-NEXT: sext.w a3, a0 -; RV64I-NEXT: addiw a4, a0, -1 +; RV64I-NEXT: addi a4, a0, -1 ; RV64I-NEXT: and a0, a0, a4 ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: snez a3, a3 ; RV64I-NEXT: and a0, a3, a0 -; RV64I-NEXT: addiw a3, a1, -1 +; RV64I-NEXT: addi a3, a1, -1 ; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: sext.w a1, a1 ; RV64I-NEXT: seqz a1, a1 @@ -855,13 +855,13 @@ define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a2, a1 ; RV64I-NEXT: sext.w a3, a0 -; RV64I-NEXT: addiw a4, a0, -1 +; RV64I-NEXT: addi a4, a0, -1 ; RV64I-NEXT: and a0, a0, a4 ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: snez a0, a0 ; RV64I-NEXT: seqz a3, a3 ; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: addiw a3, a1, -1 +; RV64I-NEXT: addi a3, a1, -1 ; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: sext.w a1, a1 ; RV64I-NEXT: snez a1, a1 @@ -1518,7 +1518,7 @@ define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: srli a2, a0, 8 ; RV64I-NEXT: lui a3, 16 -; RV64I-NEXT: addiw a3, a3, -256 +; RV64I-NEXT: addi a3, a3, -256 ; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: srliw a4, a0, 24 ; RV64I-NEXT: or a2, a2, a4 diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll index fa96c576017ba..dd6248233975a 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll @@ -303,7 +303,7 @@ define i64 @pack_i64_imm() { ; RV64ZBKB-LABEL: pack_i64_imm: ; RV64ZBKB: # %bb.0: ; RV64ZBKB-NEXT: lui a0, 65793 -; RV64ZBKB-NEXT: addiw a0, a0, 16 +; RV64ZBKB-NEXT: addi a0, a0, 16 ; RV64ZBKB-NEXT: pack a0, a0, a0 ; RV64ZBKB-NEXT: ret ret i64 1157442765409226768 ; 0x0101010101010101 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll index aa3d7b3fa8a7c..3514fa66f5886 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -229,63 +229,34 @@ define @bitreverse_nxv64i8( %va) { declare @llvm.bitreverse.nxv64i8() define @bitreverse_nxv1i16( %va) { -; RV32-LABEL: bitreverse_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv1i16: ; CHECK-ZVBB: # %bb.0: @@ -298,63 +269,34 @@ define @bitreverse_nxv1i16( %va) { declare @llvm.bitreverse.nxv1i16() define @bitreverse_nxv2i16( %va) { -; RV32-LABEL: bitreverse_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv2i16: ; CHECK-ZVBB: # %bb.0: @@ -367,63 +309,34 @@ define @bitreverse_nxv2i16( %va) { declare @llvm.bitreverse.nxv2i16() define @bitreverse_nxv4i16( %va) { -; RV32-LABEL: bitreverse_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv4i16: ; CHECK-ZVBB: # %bb.0: @@ -436,63 +349,34 @@ define @bitreverse_nxv4i16( %va) { declare @llvm.bitreverse.nxv4i16() define @bitreverse_nxv8i16( %va) { -; RV32-LABEL: bitreverse_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv8i16: ; CHECK-ZVBB: # %bb.0: @@ -505,63 +389,34 @@ define @bitreverse_nxv8i16( %va) { declare @llvm.bitreverse.nxv8i16() define @bitreverse_nxv16i16( %va) { -; RV32-LABEL: bitreverse_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv16i16: ; CHECK-ZVBB: # %bb.0: @@ -574,63 +429,34 @@ define @bitreverse_nxv16i16( %va) { declare @llvm.bitreverse.nxv16i16() define @bitreverse_nxv32i16( %va) { -; RV32-LABEL: bitreverse_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv32i16: ; CHECK-ZVBB: # %bb.0: @@ -643,79 +469,42 @@ define @bitreverse_nxv32i16( %va) { declare @llvm.bitreverse.nxv32i16() define @bitreverse_nxv1i32( %va) { -; RV32-LABEL: bitreverse_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv1i32: ; CHECK-ZVBB: # %bb.0: @@ -728,79 +517,42 @@ define @bitreverse_nxv1i32( %va) { declare @llvm.bitreverse.nxv1i32() define @bitreverse_nxv2i32( %va) { -; RV32-LABEL: bitreverse_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv2i32: ; CHECK-ZVBB: # %bb.0: @@ -813,79 +565,42 @@ define @bitreverse_nxv2i32( %va) { declare @llvm.bitreverse.nxv2i32() define @bitreverse_nxv4i32( %va) { -; RV32-LABEL: bitreverse_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: vor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv4i32: ; CHECK-ZVBB: # %bb.0: @@ -898,79 +613,42 @@ define @bitreverse_nxv4i32( %va) { declare @llvm.bitreverse.nxv4i32() define @bitreverse_nxv8i32( %va) { -; RV32-LABEL: bitreverse_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsrl.vi v16, v8, 24 +; CHECK-NEXT: vor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsll.vi v16, v16, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv8i32: ; CHECK-ZVBB: # %bb.0: @@ -983,79 +661,42 @@ define @bitreverse_nxv8i32( %va) { declare @llvm.bitreverse.nxv8i32() define @bitreverse_nxv16i32( %va) { -; RV32-LABEL: bitreverse_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vand.vx v24, v8, a0 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsrl.vi v24, v8, 24 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vand.vx v24, v8, a0 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v24, v8, 24 +; CHECK-NEXT: vor.vv v16, v16, v24 +; CHECK-NEXT: vand.vx v24, v8, a0 +; CHECK-NEXT: vsll.vi v24, v24, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv16i32: ; CHECK-ZVBB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index ca2d7639f528e..7698f860589aa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -487,63 +487,34 @@ define @vp_bitreverse_nxv64i8_unmasked( %va declare @llvm.vp.bitreverse.nxv1i16(, , i32) define @vp_bitreverse_nxv1i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16: ; CHECK-ZVBB: # %bb.0: @@ -555,63 +526,34 @@ define @vp_bitreverse_nxv1i16( %va, @vp_bitreverse_nxv1i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv1i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv1i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -627,63 +569,34 @@ define @vp_bitreverse_nxv1i16_unmasked( %va declare @llvm.vp.bitreverse.nxv2i16(, , i32) define @vp_bitreverse_nxv2i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16: ; CHECK-ZVBB: # %bb.0: @@ -695,63 +608,34 @@ define @vp_bitreverse_nxv2i16( %va, @vp_bitreverse_nxv2i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -767,63 +651,34 @@ define @vp_bitreverse_nxv2i16_unmasked( %va declare @llvm.vp.bitreverse.nxv4i16(, , i32) define @vp_bitreverse_nxv4i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16: ; CHECK-ZVBB: # %bb.0: @@ -835,63 +690,34 @@ define @vp_bitreverse_nxv4i16( %va, @vp_bitreverse_nxv4i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -907,63 +733,34 @@ define @vp_bitreverse_nxv4i16_unmasked( %va declare @llvm.vp.bitreverse.nxv8i16(, , i32) define @vp_bitreverse_nxv8i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16: ; CHECK-ZVBB: # %bb.0: @@ -975,63 +772,34 @@ define @vp_bitreverse_nxv8i16( %va, @vp_bitreverse_nxv8i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1047,63 +815,34 @@ define @vp_bitreverse_nxv8i16_unmasked( %va declare @llvm.vp.bitreverse.nxv16i16(, , i32) define @vp_bitreverse_nxv16i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v12, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16: ; CHECK-ZVBB: # %bb.0: @@ -1115,63 +854,34 @@ define @vp_bitreverse_nxv16i16( %va, @vp_bitreverse_nxv16i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1187,63 +897,34 @@ define @vp_bitreverse_nxv16i16_unmasked( declare @llvm.vp.bitreverse.nxv32i16(, , i32) define @vp_bitreverse_nxv32i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16: ; CHECK-ZVBB: # %bb.0: @@ -1255,63 +936,34 @@ define @vp_bitreverse_nxv32i16( %va, @vp_bitreverse_nxv32i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv32i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv32i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1327,79 +979,42 @@ define @vp_bitreverse_nxv32i16_unmasked( declare @llvm.vp.bitreverse.nxv1i32(, , i32) define @vp_bitreverse_nxv1i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsll.vi v10, v10, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV64-NEXT: vor.vv v9, v9, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsll.vi v10, v10, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t +; CHECK-NEXT: vor.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32: ; CHECK-ZVBB: # %bb.0: @@ -1411,79 +1026,42 @@ define @vp_bitreverse_nxv1i32( %va, @vp_bitreverse_nxv1i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv1i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv1i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1499,79 +1077,42 @@ define @vp_bitreverse_nxv1i32_unmasked( %va declare @llvm.vp.bitreverse.nxv2i32(, , i32) define @vp_bitreverse_nxv2i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsll.vi v10, v10, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV64-NEXT: vor.vv v9, v9, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsll.vi v10, v10, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t +; CHECK-NEXT: vor.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32: ; CHECK-ZVBB: # %bb.0: @@ -1583,79 +1124,42 @@ define @vp_bitreverse_nxv2i32( %va, @vp_bitreverse_nxv2i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1671,79 +1175,42 @@ define @vp_bitreverse_nxv2i32_unmasked( %va declare @llvm.vp.bitreverse.nxv4i32(, , i32) define @vp_bitreverse_nxv4i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV64-NEXT: vor.vv v10, v10, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsll.vi v12, v12, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t +; CHECK-NEXT: vor.vv v10, v10, v12, v0.t +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32: ; CHECK-ZVBB: # %bb.0: @@ -1755,79 +1222,42 @@ define @vp_bitreverse_nxv4i32( %va, @vp_bitreverse_nxv4i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: vor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1843,79 +1273,42 @@ define @vp_bitreverse_nxv4i32_unmasked( %va declare @llvm.vp.bitreverse.nxv8i32(, , i32) define @vp_bitreverse_nxv8i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV64-NEXT: vor.vv v12, v12, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsll.vi v16, v16, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t +; CHECK-NEXT: vor.vv v12, v12, v16, v0.t +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v12, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32: ; CHECK-ZVBB: # %bb.0: @@ -1927,167 +1320,93 @@ define @vp_bitreverse_nxv8i32( %va, @vp_bitreverse_nxv8i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsrl.vi v16, v8, 24 +; CHECK-NEXT: vor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsll.vi v16, v16, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32_unmasked: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-ZVBB-NEXT: vbrev.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv8i32( %va, %m, i32 %evl) - ret %v -} - -declare @llvm.vp.bitreverse.nxv16i32(, , i32) - -define @vp_bitreverse_nxv16i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a0, v0.t -; RV32-NEXT: vsll.vi v24, v24, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: vand.vx v24, v8, a0, v0.t -; RV64-NEXT: vsll.vi v24, v24, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: ret +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.bitreverse.nxv8i32( %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.bitreverse.nxv16i32(, , i32) + +define @vp_bitreverse_nxv16i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_bitreverse_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsrl.vi v24, v8, 24, v0.t +; CHECK-NEXT: vor.vv v16, v16, v24, v0.t +; CHECK-NEXT: vand.vx v24, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v24, v24, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32: ; CHECK-ZVBB: # %bb.0: @@ -2099,79 +1418,42 @@ define @vp_bitreverse_nxv16i32( %va, @vp_bitreverse_nxv16i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vand.vx v24, v8, a0 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsrl.vi v24, v8, 24 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vand.vx v24, v8, a0 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v24, v8, 24 +; CHECK-NEXT: vor.vv v16, v16, v24 +; CHECK-NEXT: vand.vx v24, v8, a0 +; CHECK-NEXT: vsll.vi v24, v24, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -3774,179 +3056,92 @@ define @vp_bitreverse_nxv8i64_unmasked( %va declare @llvm.vp.bitreverse.nxv64i16(, , i32) define @vp_bitreverse_nxv64i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv64i16: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV32-NEXT: vmv1r.v v24, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a2, a1, 1 -; RV32-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 8, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV32-NEXT: lui a2, 1 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vand.vx v8, v8, a2, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vsll.vi v16, v16, 4, v0.t -; RV32-NEXT: vor.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t -; RV32-NEXT: lui a3, 3 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vand.vx v8, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a3, v0.t -; RV32-NEXT: vsll.vi v16, v16, 2, v0.t -; RV32-NEXT: vor.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: lui a4, 5 -; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vand.vx v16, v16, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB46_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB46_2: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vand.vx v8, v8, a2, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vand.vx v16, v16, a3, v0.t -; RV32-NEXT: vand.vx v8, v8, a3, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vand.vx v16, v16, a4, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv64i16: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a2, a1, 1 -; RV64-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v8, v16, 8, v0.t -; RV64-NEXT: vsll.vi v16, v16, 8, v0.t -; RV64-NEXT: vor.vv v16, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV64-NEXT: lui a2, 1 -; RV64-NEXT: addiw a2, a2, -241 -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsll.vi v16, v16, 4, v0.t -; RV64-NEXT: vor.vv v16, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t -; RV64-NEXT: lui a3, 3 -; RV64-NEXT: addiw a3, a3, 819 -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vsll.vi v16, v16, 2, v0.t -; RV64-NEXT: vor.vv v16, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: lui a4, 5 -; RV64-NEXT: addiw a4, a4, 1365 -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t -; RV64-NEXT: vsll.vi v16, v16, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: addi a5, sp, 16 -; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; RV64-NEXT: bltu a0, a1, .LBB46_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB46_2: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv64i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v8, v16, 8, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t +; CHECK-NEXT: vor.vv v16, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v8, v16, 4, v0.t +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: addi a2, a2, -241 +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t +; CHECK-NEXT: vand.vx v16, v16, a2, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t +; CHECK-NEXT: vor.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v8, v16, 2, v0.t +; CHECK-NEXT: lui a3, 3 +; CHECK-NEXT: addi a3, a3, 819 +; CHECK-NEXT: vand.vx v8, v8, a3, v0.t +; CHECK-NEXT: vand.vx v16, v16, a3, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t +; CHECK-NEXT: vor.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v8, v16, 1, v0.t +; CHECK-NEXT: lui a4, 5 +; CHECK-NEXT: addi a4, a4, 1365 +; CHECK-NEXT: vand.vx v8, v8, a4, v0.t +; CHECK-NEXT: vand.vx v16, v16, a4, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a0, a1, .LBB46_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB46_2: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vand.vx v16, v16, a2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t +; CHECK-NEXT: vand.vx v16, v16, a3, v0.t +; CHECK-NEXT: vand.vx v8, v8, a3, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: vand.vx v16, v16, a4, v0.t +; CHECK-NEXT: vand.vx v8, v8, a4, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16: ; CHECK-ZVBB: # %bb.0: @@ -3975,121 +3170,63 @@ define @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv64i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 8 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: lui a2, 1 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vsll.vi v16, v16, 4 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 2 -; RV32-NEXT: lui a3, 3 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vsll.vi v16, v16, 2 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: lui a4, 5 -; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vand.vx v24, v24, a4 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vadd.vv v16, v16, v16 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: bltu a0, a1, .LBB47_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB47_2: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 2 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vand.vx v24, v24, a4 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv64i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v16, 8 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: lui a2, 1 -; RV64-NEXT: addiw a2, a2, -241 -; RV64-NEXT: vand.vx v24, v24, a2 -; RV64-NEXT: vand.vx v16, v16, a2 -; RV64-NEXT: vsll.vi v16, v16, 4 -; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 2 -; RV64-NEXT: lui a3, 3 -; RV64-NEXT: addiw a3, a3, 819 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vand.vx v16, v16, a3 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: lui a4, 5 -; RV64-NEXT: addiw a4, a4, 1365 -; RV64-NEXT: vand.vx v24, v24, a4 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: bltu a0, a1, .LBB47_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB47_2: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vand.vx v24, v24, a2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 2 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a4 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v24, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv64i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v24, v16, 8 +; CHECK-NEXT: vsll.vi v16, v16, 8 +; CHECK-NEXT: vor.vv v16, v16, v24 +; CHECK-NEXT: vsrl.vi v24, v16, 4 +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: addi a2, a2, -241 +; CHECK-NEXT: vand.vx v24, v24, a2 +; CHECK-NEXT: vand.vx v16, v16, a2 +; CHECK-NEXT: vsll.vi v16, v16, 4 +; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: vsrl.vi v24, v16, 2 +; CHECK-NEXT: lui a3, 3 +; CHECK-NEXT: addi a3, a3, 819 +; CHECK-NEXT: vand.vx v24, v24, a3 +; CHECK-NEXT: vand.vx v16, v16, a3 +; CHECK-NEXT: vsll.vi v16, v16, 2 +; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: vsrl.vi v24, v16, 1 +; CHECK-NEXT: lui a4, 5 +; CHECK-NEXT: addi a4, a4, 1365 +; CHECK-NEXT: vand.vx v24, v24, a4 +; CHECK-NEXT: vand.vx v16, v16, a4 +; CHECK-NEXT: vadd.vv v16, v16, v16 +; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: bltu a0, a1, .LBB47_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB47_2: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v24, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v24 +; CHECK-NEXT: vsrl.vi v24, v8, 4 +; CHECK-NEXT: vand.vx v24, v24, a2 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: vsrl.vi v24, v8, 2 +; CHECK-NEXT: vand.vx v24, v24, a3 +; CHECK-NEXT: vand.vx v8, v8, a3 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: vsrl.vi v24, v8, 1 +; CHECK-NEXT: vand.vx v24, v24, a4 +; CHECK-NEXT: vand.vx v8, v8, a4 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -4117,65 +3254,35 @@ define @vp_bitreverse_nxv64i16_unmasked( ; Test promotion. declare @llvm.vp.bitreverse.nxv1i9(, , i32) define @vp_bitreverse_nxv1i9( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_nxv1i9: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 7, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv1i9: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 7, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_nxv1i9: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 7, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i9: ; CHECK-ZVBB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll index 884bb206a31eb..c55399b502471 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll @@ -119,37 +119,21 @@ define @bswap_nxv32i16( %va) { declare @llvm.bswap.nxv32i16() define @bswap_nxv1i32( %va) { -; RV32-LABEL: bswap_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: bswap_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: bswap_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: bswap_nxv1i32: ; CHECK-ZVKB: # %bb.0: @@ -162,37 +146,21 @@ define @bswap_nxv1i32( %va) { declare @llvm.bswap.nxv1i32() define @bswap_nxv2i32( %va) { -; RV32-LABEL: bswap_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: bswap_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: bswap_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: bswap_nxv2i32: ; CHECK-ZVKB: # %bb.0: @@ -205,37 +173,21 @@ define @bswap_nxv2i32( %va) { declare @llvm.bswap.nxv2i32() define @bswap_nxv4i32( %va) { -; RV32-LABEL: bswap_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: bswap_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: bswap_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: vor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: bswap_nxv4i32: ; CHECK-ZVKB: # %bb.0: @@ -248,37 +200,21 @@ define @bswap_nxv4i32( %va) { declare @llvm.bswap.nxv4i32() define @bswap_nxv8i32( %va) { -; RV32-LABEL: bswap_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: bswap_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: bswap_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsrl.vi v16, v8, 24 +; CHECK-NEXT: vor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsll.vi v16, v16, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: bswap_nxv8i32: ; CHECK-ZVKB: # %bb.0: @@ -291,37 +227,21 @@ define @bswap_nxv8i32( %va) { declare @llvm.bswap.nxv8i32() define @bswap_nxv16i32( %va) { -; RV32-LABEL: bswap_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vand.vx v24, v8, a0 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: bswap_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsrl.vi v24, v8, 24 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vand.vx v24, v8, a0 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: bswap_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v24, v8, 24 +; CHECK-NEXT: vor.vv v16, v16, v24 +; CHECK-NEXT: vand.vx v24, v8, a0 +; CHECK-NEXT: vsll.vi v24, v24, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: bswap_nxv16i32: ; CHECK-ZVKB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index 3c4ff5e7d1cef..6aac13a0bcbb8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -251,37 +251,21 @@ define @vp_bswap_nxv32i16_unmasked( %va, declare @llvm.vp.bswap.nxv1i32(, , i32) define @vp_bswap_nxv1i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsll.vi v10, v10, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV64-NEXT: vor.vv v9, v9, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsll.vi v10, v10, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t +; CHECK-NEXT: vor.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i32: ; CHECK-ZVKB: # %bb.0: @@ -293,37 +277,21 @@ define @vp_bswap_nxv1i32( %va, @vp_bswap_nxv1i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_nxv1i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_nxv1i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: vp_bswap_nxv1i32_unmasked: ; CHECK-ZVKB: # %bb.0: @@ -339,37 +307,21 @@ define @vp_bswap_nxv1i32_unmasked( %va, i32 declare @llvm.vp.bswap.nxv2i32(, , i32) define @vp_bswap_nxv2i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsll.vi v10, v10, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV64-NEXT: vor.vv v9, v9, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsll.vi v10, v10, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t +; CHECK-NEXT: vor.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: vp_bswap_nxv2i32: ; CHECK-ZVKB: # %bb.0: @@ -381,37 +333,21 @@ define @vp_bswap_nxv2i32( %va, @vp_bswap_nxv2i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_nxv2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_nxv2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: vp_bswap_nxv2i32_unmasked: ; CHECK-ZVKB: # %bb.0: @@ -427,37 +363,21 @@ define @vp_bswap_nxv2i32_unmasked( %va, i32 declare @llvm.vp.bswap.nxv4i32(, , i32) define @vp_bswap_nxv4i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV64-NEXT: vor.vv v10, v10, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsll.vi v12, v12, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t +; CHECK-NEXT: vor.vv v10, v10, v12, v0.t +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: vp_bswap_nxv4i32: ; CHECK-ZVKB: # %bb.0: @@ -469,37 +389,21 @@ define @vp_bswap_nxv4i32( %va, @vp_bswap_nxv4i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_nxv4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_nxv4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: vor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: vp_bswap_nxv4i32_unmasked: ; CHECK-ZVKB: # %bb.0: @@ -515,37 +419,21 @@ define @vp_bswap_nxv4i32_unmasked( %va, i32 declare @llvm.vp.bswap.nxv8i32(, , i32) define @vp_bswap_nxv8i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV64-NEXT: vor.vv v12, v12, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsll.vi v16, v16, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t +; CHECK-NEXT: vor.vv v12, v12, v16, v0.t +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: vp_bswap_nxv8i32: ; CHECK-ZVKB: # %bb.0: @@ -557,37 +445,21 @@ define @vp_bswap_nxv8i32( %va, @vp_bswap_nxv8i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_nxv8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_nxv8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsrl.vi v16, v8, 24 +; CHECK-NEXT: vor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsll.vi v16, v16, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: vp_bswap_nxv8i32_unmasked: ; CHECK-ZVKB: # %bb.0: @@ -603,37 +475,21 @@ define @vp_bswap_nxv8i32_unmasked( %va, i32 declare @llvm.vp.bswap.nxv16i32(, , i32) define @vp_bswap_nxv16i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a0, v0.t -; RV32-NEXT: vsll.vi v24, v24, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: vand.vx v24, v8, a0, v0.t -; RV64-NEXT: vsll.vi v24, v24, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsrl.vi v24, v8, 24, v0.t +; CHECK-NEXT: vor.vv v16, v16, v24, v0.t +; CHECK-NEXT: vand.vx v24, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v24, v24, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: vp_bswap_nxv16i32: ; CHECK-ZVKB: # %bb.0: @@ -645,37 +501,21 @@ define @vp_bswap_nxv16i32( %va, @vp_bswap_nxv16i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_nxv16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vand.vx v24, v8, a0 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_nxv16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsrl.vi v24, v8, 24 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vand.vx v24, v8, a0 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsrl.vi v24, v8, 24 +; CHECK-NEXT: vor.vv v16, v16, v24 +; CHECK-NEXT: vand.vx v24, v8, a0 +; CHECK-NEXT: vsll.vi v24, v24, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret ; ; CHECK-ZVKB-LABEL: vp_bswap_nxv16i32_unmasked: ; CHECK-ZVKB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll index a128adc8de619..46bedcd4e9666 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll @@ -173,21 +173,13 @@ define @vselect_add_const_nxv2i64( %a0) { } define <2 x i16> @vselect_add_const_signbit_v2i16(<2 x i16> %a0) { -; RV32-LABEL: vselect_add_const_signbit_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vssubu.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vselect_add_const_signbit_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vssubu.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vselect_add_const_signbit_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret %cmp = icmp ugt <2 x i16> %a0, %v1 = add <2 x i16> %a0, %v2 = select <2 x i1> %cmp, <2 x i16> %v1, <2 x i16> zeroinitializer @@ -195,21 +187,13 @@ define <2 x i16> @vselect_add_const_signbit_v2i16(<2 x i16> %a0) { } define @vselect_add_const_signbit_nxv2i16( %a0) { -; RV32-LABEL: vselect_add_const_signbit_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32-NEXT: vssubu.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vselect_add_const_signbit_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64-NEXT: vssubu.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vselect_add_const_signbit_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret %cm1 = insertelement poison, i16 32766, i32 0 %splatcm1 = shufflevector %cm1, poison, zeroinitializer %nc = insertelement poison, i16 -32767, i32 0 @@ -318,3 +302,6 @@ declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>) declare @llvm.umin.nxv2i64(, ) declare @llvm.umax.nxv2i64(, ) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll index c1a8c657b4ff8..d78d67d5e3598 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -394,71 +394,38 @@ define @ctlz_nxv64i8( %va) { declare @llvm.ctlz.nxv64i8(, i1) define @ctlz_nxv1i16( %va) { -; RV32I-LABEL: ctlz_nxv1i16: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv1i16: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv1i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_nxv1i16: ; CHECK-F: # %bb.0: @@ -493,71 +460,38 @@ define @ctlz_nxv1i16( %va) { declare @llvm.ctlz.nxv1i16(, i1) define @ctlz_nxv2i16( %va) { -; RV32I-LABEL: ctlz_nxv2i16: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv2i16: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv2i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_nxv2i16: ; CHECK-F: # %bb.0: @@ -592,71 +526,38 @@ define @ctlz_nxv2i16( %va) { declare @llvm.ctlz.nxv2i16(, i1) define @ctlz_nxv4i16( %va) { -; RV32I-LABEL: ctlz_nxv4i16: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv4i16: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv4i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_nxv4i16: ; CHECK-F: # %bb.0: @@ -691,71 +592,38 @@ define @ctlz_nxv4i16( %va) { declare @llvm.ctlz.nxv4i16(, i1) define @ctlz_nxv8i16( %va) { -; RV32I-LABEL: ctlz_nxv8i16: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv8i16: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv8i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_nxv8i16: ; CHECK-F: # %bb.0: @@ -790,71 +658,38 @@ define @ctlz_nxv8i16( %va) { declare @llvm.ctlz.nxv8i16(, i1) define @ctlz_nxv16i16( %va) { -; RV32I-LABEL: ctlz_nxv16i16: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v12, v12, a0 -; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v12, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv16i16: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v12, v12, a0 -; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v12, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv16i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_nxv16i16: ; CHECK-F: # %bb.0: @@ -889,71 +724,38 @@ define @ctlz_nxv16i16( %va) { declare @llvm.ctlz.nxv16i16(, i1) define @ctlz_nxv32i16( %va) { -; RV32-LABEL: ctlz_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: ctlz_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: ctlz_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_nxv32i16: ; CHECK-ZVBB: # %bb.0: @@ -966,77 +768,41 @@ define @ctlz_nxv32i16( %va) { declare @llvm.ctlz.nxv32i16(, i1) define @ctlz_nxv1i32( %va) { -; RV32I-LABEL: ctlz_nxv1i32: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 16 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv1i32: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 16 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv1i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 16 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_nxv1i32: ; CHECK-F: # %bb.0: @@ -1074,77 +840,41 @@ define @ctlz_nxv1i32( %va) { declare @llvm.ctlz.nxv1i32(, i1) define @ctlz_nxv2i32( %va) { -; RV32I-LABEL: ctlz_nxv2i32: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 16 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv2i32: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 16 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv2i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 16 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_nxv2i32: ; CHECK-F: # %bb.0: @@ -1182,77 +912,41 @@ define @ctlz_nxv2i32( %va) { declare @llvm.ctlz.nxv2i32(, i1) define @ctlz_nxv4i32( %va) { -; RV32I-LABEL: ctlz_nxv4i32: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 16 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv4i32: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 16 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv4i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 16 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_nxv4i32: ; CHECK-F: # %bb.0: @@ -1290,77 +984,41 @@ define @ctlz_nxv4i32( %va) { declare @llvm.ctlz.nxv4i32(, i1) define @ctlz_nxv8i32( %va) { -; RV32I-LABEL: ctlz_nxv8i32: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 16 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v12, v12, a0 -; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v12, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv8i32: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 16 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v12, v12, a0 -; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v12, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv8i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 16 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_nxv8i32: ; CHECK-F: # %bb.0: @@ -1398,77 +1056,41 @@ define @ctlz_nxv8i32( %va) { declare @llvm.ctlz.nxv8i32(, i1) define @ctlz_nxv16i32( %va) { -; RV32I-LABEL: ctlz_nxv16i32: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV32I-NEXT: vsrl.vi v16, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: vsrl.vi v16, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: vsrl.vi v16, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: vsrl.vi v16, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: vsrl.vi v16, v8, 16 -; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v16, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v16, v16, a0 -; RV32I-NEXT: vsub.vv v8, v8, v16 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v16, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: vsrl.vi v16, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v16 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_nxv16i32: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64I-NEXT: vsrl.vi v16, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v16 -; RV64I-NEXT: vsrl.vi v16, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v16 -; RV64I-NEXT: vsrl.vi v16, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v16 -; RV64I-NEXT: vsrl.vi v16, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v16 -; RV64I-NEXT: vsrl.vi v16, v8, 16 -; RV64I-NEXT: vor.vv v8, v8, v16 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v16, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v16, v16, a0 -; RV64I-NEXT: vsub.vv v8, v8, v16 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v16, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v16, v8 -; RV64I-NEXT: vsrl.vi v16, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v16 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_nxv16i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 16 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v16, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v16, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_nxv16i32: ; CHECK-F: # %bb.0: @@ -2425,71 +2047,38 @@ define @ctlz_zero_undef_nxv64i8( %va) { } define @ctlz_zero_undef_nxv1i16( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv1i16: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv1i16: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv1i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv1i16: ; CHECK-F: # %bb.0: @@ -2519,71 +2108,38 @@ define @ctlz_zero_undef_nxv1i16( %va) { } define @ctlz_zero_undef_nxv2i16( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv2i16: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv2i16: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv2i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv2i16: ; CHECK-F: # %bb.0: @@ -2613,71 +2169,38 @@ define @ctlz_zero_undef_nxv2i16( %va) { } define @ctlz_zero_undef_nxv4i16( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv4i16: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv4i16: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv4i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv4i16: ; CHECK-F: # %bb.0: @@ -2707,71 +2230,38 @@ define @ctlz_zero_undef_nxv4i16( %va) { } define @ctlz_zero_undef_nxv8i16( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv8i16: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv8i16: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv8i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv8i16: ; CHECK-F: # %bb.0: @@ -2801,71 +2291,38 @@ define @ctlz_zero_undef_nxv8i16( %va) { } define @ctlz_zero_undef_nxv16i16( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv16i16: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v12, v12, a0 -; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v12, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv16i16: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v12, v12, a0 -; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v12, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv16i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv16i16: ; CHECK-F: # %bb.0: @@ -2895,153 +2352,84 @@ define @ctlz_zero_undef_nxv16i16( %va) { } define @ctlz_zero_undef_nxv32i16( %va) { -; RV32-LABEL: ctlz_zero_undef_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: ctlz_zero_undef_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv32i16: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-LABEL: ctlz_zero_undef_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret %a = call @llvm.ctlz.nxv32i16( %va, i1 true) ret %a } define @ctlz_zero_undef_nxv1i32( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv1i32: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 16 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv1i32: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 16 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv1i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 16 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv1i32: ; CHECK-F: # %bb.0: @@ -3074,77 +2462,41 @@ define @ctlz_zero_undef_nxv1i32( %va) { } define @ctlz_zero_undef_nxv2i32( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv2i32: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 16 -; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv2i32: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 16 -; RV64I-NEXT: vor.vv v8, v8, v9 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv2i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 16 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv2i32: ; CHECK-F: # %bb.0: @@ -3177,77 +2529,41 @@ define @ctlz_zero_undef_nxv2i32( %va) { } define @ctlz_zero_undef_nxv4i32( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv4i32: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 16 -; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv4i32: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 16 -; RV64I-NEXT: vor.vv v8, v8, v10 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv4i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 16 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv4i32: ; CHECK-F: # %bb.0: @@ -3280,77 +2596,41 @@ define @ctlz_zero_undef_nxv4i32( %va) { } define @ctlz_zero_undef_nxv8i32( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv8i32: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 16 -; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v12, v12, a0 -; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v12, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv8i32: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 16 -; RV64I-NEXT: vor.vv v8, v8, v12 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v12, v12, a0 -; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v12, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv8i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 16 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv8i32: ; CHECK-F: # %bb.0: @@ -3383,77 +2663,41 @@ define @ctlz_zero_undef_nxv8i32( %va) { } define @ctlz_zero_undef_nxv16i32( %va) { -; RV32I-LABEL: ctlz_zero_undef_nxv16i32: -; RV32I: # %bb.0: -; RV32I-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV32I-NEXT: vsrl.vi v16, v8, 1 -; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: vsrl.vi v16, v8, 2 -; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: vsrl.vi v16, v8, 4 -; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: vsrl.vi v16, v8, 8 -; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: vsrl.vi v16, v8, 16 -; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vsrl.vi v16, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v16, v16, a0 -; RV32I-NEXT: vsub.vv v8, v8, v16 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v16, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: vsrl.vi v16, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v16 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: ctlz_zero_undef_nxv16i32: -; RV64I: # %bb.0: -; RV64I-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64I-NEXT: vsrl.vi v16, v8, 1 -; RV64I-NEXT: vor.vv v8, v8, v16 -; RV64I-NEXT: vsrl.vi v16, v8, 2 -; RV64I-NEXT: vor.vv v8, v8, v16 -; RV64I-NEXT: vsrl.vi v16, v8, 4 -; RV64I-NEXT: vor.vv v8, v8, v16 -; RV64I-NEXT: vsrl.vi v16, v8, 8 -; RV64I-NEXT: vor.vv v8, v8, v16 -; RV64I-NEXT: vsrl.vi v16, v8, 16 -; RV64I-NEXT: vor.vv v8, v8, v16 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vsrl.vi v16, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v16, v16, a0 -; RV64I-NEXT: vsub.vv v8, v8, v16 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v16, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v16, v8 -; RV64I-NEXT: vsrl.vi v16, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v16 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv16i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 2 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 8 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 16 +; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v16, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v16, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: ctlz_zero_undef_nxv16i32: ; CHECK-F: # %bb.0: @@ -4025,3 +3269,6 @@ define @ctlz_zero_undef_nxv8i64( %va) { %a = call @llvm.ctlz.nxv8i64( %va, i1 true) ret %a } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index bb0ff1c2bf09b..f479937ba2fc7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -675,71 +675,38 @@ define @vp_ctlz_nxv16i16_unmasked( %va, i declare @llvm.vp.ctlz.nxv32i16(, i1 immarg, , i32) define @vp_ctlz_nxv32i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i16: ; CHECK-ZVBB: # %bb.0: @@ -751,71 +718,38 @@ define @vp_ctlz_nxv32i16( %va, @vp_ctlz_nxv32i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_nxv32i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_nxv32i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -2071,71 +2005,38 @@ define @vp_ctlz_zero_undef_nxv16i16_unmasked( @vp_ctlz_zero_undef_nxv32i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i16: ; CHECK-ZVBB: # %bb.0: @@ -2147,71 +2048,38 @@ define @vp_ctlz_zero_undef_nxv32i16( %va, } define @vp_ctlz_zero_undef_nxv32i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -2862,3 +2730,6 @@ define @vp_ctlz_zero_undef_nxv1i9( %va, @llvm.vp.ctlz.nxv1i9( %va, i1 true, %m, i32 %evl) ret %v } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll index ef0a293ad5fb9..1a2b2640ca4fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll @@ -201,53 +201,29 @@ define @ctpop_nxv64i8( %va) { declare @llvm.ctpop.nxv64i8() define @ctpop_nxv1i16( %va) { -; RV32-LABEL: ctpop_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv1i16: ; CHECK-ZVBB: # %bb.0: @@ -260,53 +236,29 @@ define @ctpop_nxv1i16( %va) { declare @llvm.ctpop.nxv1i16() define @ctpop_nxv2i16( %va) { -; RV32-LABEL: ctpop_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv2i16: ; CHECK-ZVBB: # %bb.0: @@ -319,53 +271,29 @@ define @ctpop_nxv2i16( %va) { declare @llvm.ctpop.nxv2i16() define @ctpop_nxv4i16( %va) { -; RV32-LABEL: ctpop_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv4i16: ; CHECK-ZVBB: # %bb.0: @@ -378,53 +306,29 @@ define @ctpop_nxv4i16( %va) { declare @llvm.ctpop.nxv4i16() define @ctpop_nxv8i16( %va) { -; RV32-LABEL: ctpop_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv8i16: ; CHECK-ZVBB: # %bb.0: @@ -437,53 +341,29 @@ define @ctpop_nxv8i16( %va) { declare @llvm.ctpop.nxv8i16() define @ctpop_nxv16i16( %va) { -; RV32-LABEL: ctpop_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv16i16: ; CHECK-ZVBB: # %bb.0: @@ -496,53 +376,29 @@ define @ctpop_nxv16i16( %va) { declare @llvm.ctpop.nxv16i16() define @ctpop_nxv32i16( %va) { -; RV32-LABEL: ctpop_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv32i16: ; CHECK-ZVBB: # %bb.0: @@ -555,55 +411,30 @@ define @ctpop_nxv32i16( %va) { declare @llvm.ctpop.nxv32i16() define @ctpop_nxv1i32( %va) { -; RV32-LABEL: ctpop_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv1i32: ; CHECK-ZVBB: # %bb.0: @@ -616,55 +447,30 @@ define @ctpop_nxv1i32( %va) { declare @llvm.ctpop.nxv1i32() define @ctpop_nxv2i32( %va) { -; RV32-LABEL: ctpop_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv2i32: ; CHECK-ZVBB: # %bb.0: @@ -677,55 +483,30 @@ define @ctpop_nxv2i32( %va) { declare @llvm.ctpop.nxv2i32() define @ctpop_nxv4i32( %va) { -; RV32-LABEL: ctpop_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv4i32: ; CHECK-ZVBB: # %bb.0: @@ -738,55 +519,30 @@ define @ctpop_nxv4i32( %va) { declare @llvm.ctpop.nxv4i32() define @ctpop_nxv8i32( %va) { -; RV32-LABEL: ctpop_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv8i32: ; CHECK-ZVBB: # %bb.0: @@ -799,55 +555,30 @@ define @ctpop_nxv8i32( %va) { declare @llvm.ctpop.nxv8i32() define @ctpop_nxv16i32( %va) { -; RV32-LABEL: ctpop_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: ctpop_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: ctpop_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctpop_nxv16i32: ; CHECK-ZVBB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll index 1c003a33c54bf..9767ba4bbc3b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -417,53 +417,29 @@ define @vp_ctpop_nxv64i8_unmasked( %va, i32 declare @llvm.vp.ctpop.nxv1i16(, , i32) define @vp_ctpop_nxv1i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16: ; CHECK-ZVBB: # %bb.0: @@ -475,53 +451,29 @@ define @vp_ctpop_nxv1i16( %va, @vp_ctpop_nxv1i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv1i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv1i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -537,53 +489,29 @@ define @vp_ctpop_nxv1i16_unmasked( %va, i32 declare @llvm.vp.ctpop.nxv2i16(, , i32) define @vp_ctpop_nxv2i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16: ; CHECK-ZVBB: # %bb.0: @@ -595,53 +523,29 @@ define @vp_ctpop_nxv2i16( %va, @vp_ctpop_nxv2i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -657,53 +561,29 @@ define @vp_ctpop_nxv2i16_unmasked( %va, i32 declare @llvm.vp.ctpop.nxv4i16(, , i32) define @vp_ctpop_nxv4i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16: ; CHECK-ZVBB: # %bb.0: @@ -715,53 +595,29 @@ define @vp_ctpop_nxv4i16( %va, @vp_ctpop_nxv4i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -777,53 +633,29 @@ define @vp_ctpop_nxv4i16_unmasked( %va, i32 declare @llvm.vp.ctpop.nxv8i16(, , i32) define @vp_ctpop_nxv8i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16: ; CHECK-ZVBB: # %bb.0: @@ -835,53 +667,29 @@ define @vp_ctpop_nxv8i16( %va, @vp_ctpop_nxv8i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -897,53 +705,29 @@ define @vp_ctpop_nxv8i16_unmasked( %va, i32 declare @llvm.vp.ctpop.nxv16i16(, , i32) define @vp_ctpop_nxv16i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16: ; CHECK-ZVBB: # %bb.0: @@ -955,53 +739,29 @@ define @vp_ctpop_nxv16i16( %va, @vp_ctpop_nxv16i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1017,53 +777,29 @@ define @vp_ctpop_nxv16i16_unmasked( %va, declare @llvm.vp.ctpop.nxv32i16(, , i32) define @vp_ctpop_nxv32i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16: ; CHECK-ZVBB: # %bb.0: @@ -1075,53 +811,29 @@ define @vp_ctpop_nxv32i16( %va, @vp_ctpop_nxv32i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv32i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv32i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1137,55 +849,30 @@ define @vp_ctpop_nxv32i16_unmasked( %va, declare @llvm.vp.ctpop.nxv1i32(, , i32) define @vp_ctpop_nxv1i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32: ; CHECK-ZVBB: # %bb.0: @@ -1197,55 +884,30 @@ define @vp_ctpop_nxv1i32( %va, @vp_ctpop_nxv1i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv1i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv1i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1261,55 +923,30 @@ define @vp_ctpop_nxv1i32_unmasked( %va, i32 declare @llvm.vp.ctpop.nxv2i32(, , i32) define @vp_ctpop_nxv2i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32: ; CHECK-ZVBB: # %bb.0: @@ -1321,55 +958,30 @@ define @vp_ctpop_nxv2i32( %va, @vp_ctpop_nxv2i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1385,55 +997,30 @@ define @vp_ctpop_nxv2i32_unmasked( %va, i32 declare @llvm.vp.ctpop.nxv4i32(, , i32) define @vp_ctpop_nxv4i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32: ; CHECK-ZVBB: # %bb.0: @@ -1445,55 +1032,30 @@ define @vp_ctpop_nxv4i32( %va, @vp_ctpop_nxv4i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1509,55 +1071,30 @@ define @vp_ctpop_nxv4i32_unmasked( %va, i32 declare @llvm.vp.ctpop.nxv8i32(, , i32) define @vp_ctpop_nxv8i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32: ; CHECK-ZVBB: # %bb.0: @@ -1569,55 +1106,30 @@ define @vp_ctpop_nxv8i32( %va, @vp_ctpop_nxv8i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1633,55 +1145,30 @@ define @vp_ctpop_nxv8i32_unmasked( %va, i32 declare @llvm.vp.ctpop.nxv16i32(, , i32) define @vp_ctpop_nxv16i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32: ; CHECK-ZVBB: # %bb.0: @@ -1693,55 +1180,30 @@ define @vp_ctpop_nxv16i32( %va, @vp_ctpop_nxv16i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -3133,59 +2595,32 @@ define @vp_ctpop_nxv16i64_unmasked( %va, declare @llvm.vp.ctpop.nxv1i9(, , i32) define @vp_ctpop_nxv1i9( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_nxv1i9: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 511 -; RV32-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_nxv1i9: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 511 -; RV64-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_nxv1i9: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 511 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i9: ; CHECK-ZVBB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll index 3fcbfa8b142a6..fb47352409046 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -403,61 +403,33 @@ define @cttz_nxv64i8( %va) { declare @llvm.cttz.nxv64i8(, i1) define @cttz_nxv1i16( %va) { -; RV32I-LABEL: cttz_nxv1i16: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv1i16: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv1i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_nxv1i16: ; CHECK-F: # %bb.0: @@ -498,61 +470,33 @@ define @cttz_nxv1i16( %va) { declare @llvm.cttz.nxv1i16(, i1) define @cttz_nxv2i16( %va) { -; RV32I-LABEL: cttz_nxv2i16: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv2i16: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv2i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_nxv2i16: ; CHECK-F: # %bb.0: @@ -593,61 +537,33 @@ define @cttz_nxv2i16( %va) { declare @llvm.cttz.nxv2i16(, i1) define @cttz_nxv4i16( %va) { -; RV32I-LABEL: cttz_nxv4i16: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv4i16: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv4i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_nxv4i16: ; CHECK-F: # %bb.0: @@ -688,61 +604,33 @@ define @cttz_nxv4i16( %va) { declare @llvm.cttz.nxv4i16(, i1) define @cttz_nxv8i16( %va) { -; RV32I-LABEL: cttz_nxv8i16: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV32I-NEXT: vsub.vx v10, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv8i16: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV64I-NEXT: vsub.vx v10, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv8i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_nxv8i16: ; CHECK-F: # %bb.0: @@ -783,61 +671,33 @@ define @cttz_nxv8i16( %va) { declare @llvm.cttz.nxv8i16(, i1) define @cttz_nxv16i16( %va) { -; RV32I-LABEL: cttz_nxv16i16: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV32I-NEXT: vsub.vx v12, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v12, v12, a0 -; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v12, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv16i16: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV64I-NEXT: vsub.vx v12, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v12, v12, a0 -; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v12, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv16i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_nxv16i16: ; CHECK-F: # %bb.0: @@ -878,130 +738,73 @@ define @cttz_nxv16i16( %va) { declare @llvm.cttz.nxv16i16(, i1) define @cttz_nxv32i16( %va) { -; RV32-LABEL: cttz_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: cttz_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret -; -; CHECK-ZVBB-LABEL: cttz_nxv32i16: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %a = call @llvm.cttz.nxv32i16( %va, i1 false) - ret %a +; CHECK-LABEL: cttz_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vsub.vx v16, v8, a0 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: cttz_nxv32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.cttz.nxv32i16( %va, i1 false) + ret %a } declare @llvm.cttz.nxv32i16(, i1) define @cttz_nxv1i32( %va) { -; RV32I-LABEL: cttz_nxv1i32: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv1i32: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv1i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_nxv1i32: ; CHECK-F: # %bb.0: @@ -1045,63 +848,34 @@ define @cttz_nxv1i32( %va) { declare @llvm.cttz.nxv1i32(, i1) define @cttz_nxv2i32( %va) { -; RV32I-LABEL: cttz_nxv2i32: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv2i32: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv2i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_nxv2i32: ; CHECK-F: # %bb.0: @@ -1145,63 +919,34 @@ define @cttz_nxv2i32( %va) { declare @llvm.cttz.nxv2i32(, i1) define @cttz_nxv4i32( %va) { -; RV32I-LABEL: cttz_nxv4i32: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32I-NEXT: vsub.vx v10, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv4i32: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64I-NEXT: vsub.vx v10, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv4i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_nxv4i32: ; CHECK-F: # %bb.0: @@ -1245,63 +990,34 @@ define @cttz_nxv4i32( %va) { declare @llvm.cttz.nxv4i32(, i1) define @cttz_nxv8i32( %va) { -; RV32I-LABEL: cttz_nxv8i32: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32I-NEXT: vsub.vx v12, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v12, v12, a0 -; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v12, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv8i32: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64I-NEXT: vsub.vx v12, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v12, v12, a0 -; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v12, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv8i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_nxv8i32: ; CHECK-F: # %bb.0: @@ -1345,63 +1061,34 @@ define @cttz_nxv8i32( %va) { declare @llvm.cttz.nxv8i32(, i1) define @cttz_nxv16i32( %va) { -; RV32I-LABEL: cttz_nxv16i32: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32I-NEXT: vsub.vx v16, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vsrl.vi v16, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v16, v16, a0 -; RV32I-NEXT: vsub.vv v8, v8, v16 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v16, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: vsrl.vi v16, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v16 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_nxv16i32: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64I-NEXT: vsub.vx v16, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v16 -; RV64I-NEXT: vsrl.vi v16, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v16, v16, a0 -; RV64I-NEXT: vsub.vv v8, v8, v16 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v16, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v16, v8 -; RV64I-NEXT: vsrl.vi v16, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v16 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_nxv16i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v16, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v16, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v16, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_nxv16i32: ; CHECK-F: # %bb.0: @@ -2465,61 +2152,33 @@ define @cttz_zero_undef_nxv64i8( %va) { } define @cttz_zero_undef_nxv1i16( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv1i16: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv1i16: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_zero_undef_nxv1i16: ; CHECK-F: # %bb.0: @@ -2527,87 +2186,59 @@ define @cttz_zero_undef_nxv1i16( %va) { ; CHECK-F-NEXT: vrsub.vi v9, v8, 0 ; CHECK-F-NEXT: vand.vv v8, v8, v9 ; CHECK-F-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 -; CHECK-F-NEXT: li a0, 127 -; CHECK-F-NEXT: vsub.vx v8, v8, a0 -; CHECK-F-NEXT: ret -; -; CHECK-D-LABEL: cttz_zero_undef_nxv1i16: -; CHECK-D: # %bb.0: -; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-D-NEXT: vrsub.vi v9, v8, 0 -; CHECK-D-NEXT: vand.vv v8, v8, v9 -; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 -; CHECK-D-NEXT: li a0, 127 -; CHECK-D-NEXT: vsub.vx v8, v8, a0 -; CHECK-D-NEXT: ret -; -; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv1i16: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %a = call @llvm.cttz.nxv1i16( %va, i1 true) - ret %a -} - -define @cttz_zero_undef_nxv2i16( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv2i16: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv2i16: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-F-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: ret +; +; CHECK-D-LABEL: cttz_zero_undef_nxv1i16: +; CHECK-D: # %bb.0: +; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-D-NEXT: vrsub.vi v9, v8, 0 +; CHECK-D-NEXT: vand.vv v8, v8, v9 +; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: ret +; +; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.cttz.nxv1i16( %va, i1 true) + ret %a +} + +define @cttz_zero_undef_nxv2i16( %va) { +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_zero_undef_nxv2i16: ; CHECK-F: # %bb.0: @@ -2641,61 +2272,33 @@ define @cttz_zero_undef_nxv2i16( %va) { } define @cttz_zero_undef_nxv4i16( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv4i16: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv4i16: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_zero_undef_nxv4i16: ; CHECK-F: # %bb.0: @@ -2729,61 +2332,33 @@ define @cttz_zero_undef_nxv4i16( %va) { } define @cttz_zero_undef_nxv8i16( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv8i16: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV32I-NEXT: vsub.vx v10, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv8i16: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV64I-NEXT: vsub.vx v10, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_zero_undef_nxv8i16: ; CHECK-F: # %bb.0: @@ -2817,61 +2392,33 @@ define @cttz_zero_undef_nxv8i16( %va) { } define @cttz_zero_undef_nxv16i16( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv16i16: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV32I-NEXT: vsub.vx v12, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: lui a0, 5 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v12, v12, a0 -; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v12, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: li a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 8 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv16i16: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV64I-NEXT: vsub.vx v12, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: lui a0, 5 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v12, v12, a0 -; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v12, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: li a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 8 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i16: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 3 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 1 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: li a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 8 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_zero_undef_nxv16i16: ; CHECK-F: # %bb.0: @@ -2905,129 +2452,72 @@ define @cttz_zero_undef_nxv16i16( %va) { } define @cttz_zero_undef_nxv32i16( %va) { -; RV32-LABEL: cttz_zero_undef_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a0 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: cttz_zero_undef_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a0 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: cttz_zero_undef_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vsub.vx v16, v8, a0 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv32i16: ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-ZVBB-NEXT: vctz.v v8, v8 -; CHECK-ZVBB-NEXT: ret - %a = call @llvm.cttz.nxv32i16( %va, i1 true) - ret %a -} - -define @cttz_zero_undef_nxv1i32( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv1i32: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv1i32: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.cttz.nxv32i16( %va, i1 true) + ret %a +} + +define @cttz_zero_undef_nxv1i32( %va) { +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_zero_undef_nxv1i32: ; CHECK-F: # %bb.0: @@ -3064,63 +2554,34 @@ define @cttz_zero_undef_nxv1i32( %va) { } define @cttz_zero_undef_nxv2i32( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv2i32: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32I-NEXT: vsub.vx v9, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vsrl.vi v9, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v9, v9, a0 -; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v9, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: vsrl.vi v9, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v9 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv2i32: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: vsrl.vi v9, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v9, v9, a0 -; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v9, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: vsrl.vi v9, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v9 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_zero_undef_nxv2i32: ; CHECK-F: # %bb.0: @@ -3157,63 +2618,34 @@ define @cttz_zero_undef_nxv2i32( %va) { } define @cttz_zero_undef_nxv4i32( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv4i32: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32I-NEXT: vsub.vx v10, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vsrl.vi v10, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v10, v10, a0 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v10, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: vsrl.vi v10, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v10 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv4i32: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64I-NEXT: vsub.vx v10, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v10 -; RV64I-NEXT: vsrl.vi v10, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v10, v10, a0 -; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v10, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: vsrl.vi v10, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v10 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_zero_undef_nxv4i32: ; CHECK-F: # %bb.0: @@ -3250,63 +2682,34 @@ define @cttz_zero_undef_nxv4i32( %va) { } define @cttz_zero_undef_nxv8i32( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv8i32: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32I-NEXT: vsub.vx v12, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vsrl.vi v12, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v12, v12, a0 -; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v12, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: vsrl.vi v12, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v12 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv8i32: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64I-NEXT: vsub.vx v12, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v12 -; RV64I-NEXT: vsrl.vi v12, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v12, v12, a0 -; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v12, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: vsrl.vi v12, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v12 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v12, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v12, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v12 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_zero_undef_nxv8i32: ; CHECK-F: # %bb.0: @@ -3343,63 +2746,34 @@ define @cttz_zero_undef_nxv8i32( %va) { } define @cttz_zero_undef_nxv16i32( %va) { -; RV32I-LABEL: cttz_zero_undef_nxv16i32: -; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32I-NEXT: vsub.vx v16, v8, a0 -; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vsrl.vi v16, v8, 1 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: vand.vx v16, v16, a0 -; RV32I-NEXT: vsub.vv v8, v8, v16 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: vand.vx v16, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: vsrl.vi v16, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v16 -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: vand.vx v8, v8, a0 -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: vmul.vx v8, v8, a0 -; RV32I-NEXT: vsrl.vi v8, v8, 24 -; RV32I-NEXT: ret -; -; RV64I-LABEL: cttz_zero_undef_nxv16i32: -; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64I-NEXT: vsub.vx v16, v8, a0 -; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: vand.vv v8, v8, v16 -; RV64I-NEXT: vsrl.vi v16, v8, 1 -; RV64I-NEXT: lui a0, 349525 -; RV64I-NEXT: addiw a0, a0, 1365 -; RV64I-NEXT: vand.vx v16, v16, a0 -; RV64I-NEXT: vsub.vv v8, v8, v16 -; RV64I-NEXT: lui a0, 209715 -; RV64I-NEXT: addiw a0, a0, 819 -; RV64I-NEXT: vand.vx v16, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vadd.vv v8, v16, v8 -; RV64I-NEXT: vsrl.vi v16, v8, 4 -; RV64I-NEXT: vadd.vv v8, v8, v16 -; RV64I-NEXT: lui a0, 61681 -; RV64I-NEXT: addiw a0, a0, -241 -; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: lui a0, 4112 -; RV64I-NEXT: addiw a0, a0, 257 -; RV64I-NEXT: vmul.vx v8, v8, a0 -; RV64I-NEXT: vsrl.vi v8, v8, 24 -; RV64I-NEXT: ret +; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i32: +; CHECK-ZVE64X: # %bb.0: +; CHECK-ZVE64X-NEXT: li a0, 1 +; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-ZVE64X-NEXT: vsub.vx v16, v8, a0 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1 +; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 +; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0 +; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: lui a0, 209715 +; CHECK-ZVE64X-NEXT: addi a0, a0, 819 +; CHECK-ZVE64X-NEXT: vand.vx v16, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v16, v8 +; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 4 +; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v16 +; CHECK-ZVE64X-NEXT: lui a0, 61681 +; CHECK-ZVE64X-NEXT: addi a0, a0, -241 +; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: lui a0, 4112 +; CHECK-ZVE64X-NEXT: addi a0, a0, 257 +; CHECK-ZVE64X-NEXT: vmul.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 24 +; CHECK-ZVE64X-NEXT: ret ; ; CHECK-F-LABEL: cttz_zero_undef_nxv16i32: ; CHECK-F: # %bb.0: @@ -3915,3 +3289,6 @@ define @cttz_zero_undef_nxv8i64( %va) { %a = call @llvm.cttz.nxv8i64( %va, i1 true) ret %a } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index 1794b0cce7f86..1dda8aa458d90 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -473,61 +473,33 @@ define @vp_cttz_nxv64i8_unmasked( %va, i32 declare @llvm.vp.cttz.nxv1i16(, i1 immarg, , i32) define @vp_cttz_nxv1i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16: ; CHECK-ZVBB: # %bb.0: @@ -539,61 +511,33 @@ define @vp_cttz_nxv1i16( %va, @vp_cttz_nxv1i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv1i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv1i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -609,61 +553,33 @@ define @vp_cttz_nxv1i16_unmasked( %va, i32 declare @llvm.vp.cttz.nxv2i16(, i1 immarg, , i32) define @vp_cttz_nxv2i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16: ; CHECK-ZVBB: # %bb.0: @@ -675,61 +591,33 @@ define @vp_cttz_nxv2i16( %va, @vp_cttz_nxv2i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -745,61 +633,33 @@ define @vp_cttz_nxv2i16_unmasked( %va, i32 declare @llvm.vp.cttz.nxv4i16(, i1 immarg, , i32) define @vp_cttz_nxv4i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16: ; CHECK-ZVBB: # %bb.0: @@ -811,61 +671,33 @@ define @vp_cttz_nxv4i16( %va, @vp_cttz_nxv4i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -881,61 +713,33 @@ define @vp_cttz_nxv4i16_unmasked( %va, i32 declare @llvm.vp.cttz.nxv8i16(, i1 immarg, , i32) define @vp_cttz_nxv8i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16: ; CHECK-ZVBB: # %bb.0: @@ -947,61 +751,33 @@ define @vp_cttz_nxv8i16( %va, @vp_cttz_nxv8i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1017,61 +793,33 @@ define @vp_cttz_nxv8i16_unmasked( %va, i32 declare @llvm.vp.cttz.nxv16i16(, i1 immarg, , i32) define @vp_cttz_nxv16i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16: ; CHECK-ZVBB: # %bb.0: @@ -1083,61 +831,33 @@ define @vp_cttz_nxv16i16( %va, @vp_cttz_nxv16i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsub.vx v12, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1153,61 +873,33 @@ define @vp_cttz_nxv16i16_unmasked( %va, i declare @llvm.vp.cttz.nxv32i16(, i1 immarg, , i32) define @vp_cttz_nxv32i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16: ; CHECK-ZVBB: # %bb.0: @@ -1219,61 +911,33 @@ define @vp_cttz_nxv32i16( %va, @vp_cttz_nxv32i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv32i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv32i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsub.vx v16, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1289,63 +953,34 @@ define @vp_cttz_nxv32i16_unmasked( %va, i declare @llvm.vp.cttz.nxv1i32(, i1 immarg, , i32) define @vp_cttz_nxv1i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32: ; CHECK-ZVBB: # %bb.0: @@ -1357,63 +992,34 @@ define @vp_cttz_nxv1i32( %va, @vp_cttz_nxv1i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv1i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv1i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1429,63 +1035,34 @@ define @vp_cttz_nxv1i32_unmasked( %va, i32 declare @llvm.vp.cttz.nxv2i32(, i1 immarg, , i32) define @vp_cttz_nxv2i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32: ; CHECK-ZVBB: # %bb.0: @@ -1497,63 +1074,34 @@ define @vp_cttz_nxv2i32( %va, @vp_cttz_nxv2i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1569,63 +1117,34 @@ define @vp_cttz_nxv2i32_unmasked( %va, i32 declare @llvm.vp.cttz.nxv4i32(, i1 immarg, , i32) define @vp_cttz_nxv4i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32: ; CHECK-ZVBB: # %bb.0: @@ -1637,63 +1156,34 @@ define @vp_cttz_nxv4i32( %va, @vp_cttz_nxv4i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1709,63 +1199,34 @@ define @vp_cttz_nxv4i32_unmasked( %va, i32 declare @llvm.vp.cttz.nxv8i32(, i1 immarg, , i32) define @vp_cttz_nxv8i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32: ; CHECK-ZVBB: # %bb.0: @@ -1777,63 +1238,34 @@ define @vp_cttz_nxv8i32( %va, @vp_cttz_nxv8i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsub.vx v12, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -1849,63 +1281,34 @@ define @vp_cttz_nxv8i32_unmasked( %va, i32 declare @llvm.vp.cttz.nxv16i32(, i1 immarg, , i32) define @vp_cttz_nxv16i32( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32: ; CHECK-ZVBB: # %bb.0: @@ -1917,63 +1320,34 @@ define @vp_cttz_nxv16i32( %va, @vp_cttz_nxv16i32_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsub.vx v16, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -4138,61 +3512,33 @@ define @vp_cttz_zero_undef_nxv16i16_unmasked( @vp_cttz_zero_undef_nxv32i16( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16: ; CHECK-ZVBB: # %bb.0: @@ -4204,61 +3550,33 @@ define @vp_cttz_zero_undef_nxv32i16( %va, } define @vp_cttz_zero_undef_nxv32i16_unmasked( %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsub.vx v16, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vsrl.vi v16, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: ; CHECK-ZVBB: # %bb.0: @@ -4918,65 +4236,35 @@ define @vp_cttz_zero_undef_nxv16i64_unmasked( @llvm.vp.cttz.nxv1i9(, i1 immarg, , i32) define @vp_cttz_nxv1i9( %va, %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_nxv1i9: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 512 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vor.vx v8, v8, a1, v0.t -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsub.vx v9, v8, a0, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_nxv1i9: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 512 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vor.vx v8, v8, a1, v0.t -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsub.vx v9, v8, a0, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_nxv1i9: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 512 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vor.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vsub.vx v9, v8, a0, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv1i9: ; CHECK-ZVBB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll index f5e5b9e9083b8..34dcce3fe058b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -769,7 +769,7 @@ define i32 @extractelt_sdiv_nxv4i32_splat( %x) { ; RV64NOM-LABEL: extractelt_sdiv_nxv4i32_splat: ; RV64NOM: # %bb.0: ; RV64NOM-NEXT: lui a0, 349525 -; RV64NOM-NEXT: addiw a0, a0, 1366 +; RV64NOM-NEXT: addi a0, a0, 1366 ; RV64NOM-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV64NOM-NEXT: vmulh.vx v8, v8, a0 ; RV64NOM-NEXT: vsrl.vi v10, v8, 31 @@ -799,7 +799,7 @@ define i32 @extractelt_udiv_nxv4i32_splat( %x) { ; RV64NOM-LABEL: extractelt_udiv_nxv4i32_splat: ; RV64NOM: # %bb.0: ; RV64NOM-NEXT: lui a0, 349525 -; RV64NOM-NEXT: addiw a0, a0, 1366 +; RV64NOM-NEXT: addi a0, a0, 1366 ; RV64NOM-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV64NOM-NEXT: vmulh.vx v8, v8, a0 ; RV64NOM-NEXT: vsrl.vi v10, v8, 31 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index cf8dfea197afa..22f92fe48e22e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -231,125 +231,67 @@ define <16 x i8> @vp_bitreverse_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) declare <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16>, <2 x i1>, i32) define <2 x i16> @vp_bitreverse_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret %v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl) ret <2 x i16> %v } define <2 x i16> @vp_bitreverse_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl) @@ -359,125 +301,67 @@ define <2 x i16> @vp_bitreverse_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) declare <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16>, <4 x i1>, i32) define <4 x i16> @vp_bitreverse_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret %v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl) ret <4 x i16> %v } define <4 x i16> @vp_bitreverse_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl) @@ -487,125 +371,67 @@ define <4 x i16> @vp_bitreverse_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) declare <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16>, <8 x i1>, i32) define <8 x i16> @vp_bitreverse_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret %v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl) ret <8 x i16> %v } define <8 x i16> @vp_bitreverse_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl) @@ -615,125 +441,67 @@ define <8 x i16> @vp_bitreverse_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) declare <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16>, <16 x i1>, i32) define <16 x i16> @vp_bitreverse_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret %v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl) ret <16 x i16> %v } define <16 x i16> @vp_bitreverse_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl) @@ -743,157 +511,83 @@ define <16 x i16> @vp_bitreverse_v16i16_unmasked(<16 x i16> %va, i32 zeroext %ev declare <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32>, <2 x i1>, i32) define <2 x i32> @vp_bitreverse_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsll.vi v10, v10, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV64-NEXT: vor.vv v9, v9, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsll.vi v10, v10, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t +; CHECK-NEXT: vor.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret %v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) ret <2 x i32> %v } define <2 x i32> @vp_bitreverse_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) @@ -903,317 +597,169 @@ define <2 x i32> @vp_bitreverse_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) declare <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32>, <4 x i1>, i32) define <4 x i32> @vp_bitreverse_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsll.vi v10, v10, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV64-NEXT: vor.vv v9, v9, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsll.vi v10, v10, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t +; CHECK-NEXT: vor.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) ret <4 x i32> %v } define <4 x i32> @vp_bitreverse_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) - ret <4 x i32> %v -} - -declare <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32>, <8 x i1>, i32) - -define <8 x i32> @vp_bitreverse_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV64-NEXT: vor.vv v10, v10, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsll.vi v12, v12, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t -; RV64-NEXT: ret - %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) - ret <8 x i32> %v -} - -define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vp_bitreverse_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_bitreverse_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t +; CHECK-NEXT: vor.vv v10, v10, v12, v0.t +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_bitreverse_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: vor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) @@ -1223,157 +769,83 @@ define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) declare <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32>, <16 x i1>, i32) define <16 x i32> @vp_bitreverse_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV64-NEXT: vor.vv v12, v12, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsll.vi v16, v16, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t +; CHECK-NEXT: vor.vv v12, v12, v16, v0.t +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v12, v8, v0.t +; CHECK-NEXT: ret %v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) ret <16 x i32> %v } define <16 x i32> @vp_bitreverse_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsrl.vi v16, v8, 24 +; CHECK-NEXT: vor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsll.vi v16, v16, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) @@ -2945,295 +2417,152 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev declare <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16>, <128 x i1>, i32) define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v128i16: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; RV32-NEXT: li a2, 64 -; RV32-NEXT: vslidedown.vi v24, v0, 8 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB34_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 64 -; RV32-NEXT: .LBB34_2: -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: lui a1, 1 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vand.vx v16, v16, a1, v0.t -; RV32-NEXT: vand.vx v8, v8, a1, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: lui a2, 3 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vand.vx v8, v8, a2, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a3, 5 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vand.vx v16, v16, a3, v0.t -; RV32-NEXT: vand.vx v8, v8, a3, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, a0, -64 -; RV32-NEXT: sltu a0, a0, a4 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vand.vx v16, v16, a1, v0.t -; RV32-NEXT: vand.vx v8, v8, a1, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vand.vx v8, v8, a2, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vand.vx v16, v16, a3, v0.t -; RV32-NEXT: vand.vx v8, v8, a3, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v16, v16, v8, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v128i16: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; RV64-NEXT: li a2, 64 -; RV64-NEXT: vslidedown.vi v24, v0, 8 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB34_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 64 -; RV64-NEXT: .LBB34_2: -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: lui a1, 1 -; RV64-NEXT: addiw a1, a1, -241 -; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: lui a2, 3 -; RV64-NEXT: addiw a2, a2, 819 -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a3, 5 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV64-NEXT: addi a4, a0, -64 -; RV64-NEXT: sltu a0, a0, a4 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a4 -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t -; RV64-NEXT: vsll.vi v8, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vsll.vi v8, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vor.vv v16, v16, v8, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v128i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vslidedown.vi v24, v0, 8 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB34_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: .LBB34_2: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: lui a1, 1 +; CHECK-NEXT: addi a1, a1, -241 +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t +; CHECK-NEXT: lui a2, 3 +; CHECK-NEXT: addi a2, a2, 819 +; CHECK-NEXT: vand.vx v16, v16, a2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: lui a3, 5 +; CHECK-NEXT: addi a3, a3, 1365 +; CHECK-NEXT: vand.vx v16, v16, a3, v0.t +; CHECK-NEXT: vand.vx v8, v8, a3, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a0, -64 +; CHECK-NEXT: sltu a0, a0, a4 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a4 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t +; CHECK-NEXT: vand.vx v16, v16, a2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: vand.vx v16, v16, a3, v0.t +; CHECK-NEXT: vand.vx v8, v8, a3, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vor.vv v16, v16, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl) ret <128 x i16> %v } define <128 x i16> @vp_bitreverse_v128i16_unmasked(<128 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bitreverse_v128i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 64 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB35_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 64 -; RV32-NEXT: .LBB35_2: -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: lui a1, 1 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vand.vx v24, v24, a1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 2 -; RV32-NEXT: lui a2, 3 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: lui a3, 5 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: addi a4, a0, -64 -; RV32-NEXT: sltu a0, a0, a4 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 8 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vand.vx v24, v24, a1 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: vsll.vi v16, v16, 4 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 2 -; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vsll.vi v16, v16, 2 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vadd.vv v16, v16, v16 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v128i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 64 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB35_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 64 -; RV64-NEXT: .LBB35_2: -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: lui a1, 1 -; RV64-NEXT: addiw a1, a1, -241 -; RV64-NEXT: vand.vx v24, v24, a1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 2 -; RV64-NEXT: lui a2, 3 -; RV64-NEXT: addiw a2, a2, 819 -; RV64-NEXT: vand.vx v24, v24, a2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: lui a3, 5 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v24, v8 -; RV64-NEXT: addi a4, a0, -64 -; RV64-NEXT: sltu a0, a0, a4 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a4 -; RV64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v16, 8 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vand.vx v24, v24, a1 -; RV64-NEXT: vand.vx v16, v16, a1 -; RV64-NEXT: vsll.vi v16, v16, 4 -; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 2 -; RV64-NEXT: vand.vx v24, v24, a2 -; RV64-NEXT: vand.vx v16, v16, a2 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vand.vx v16, v16, a3 -; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bitreverse_v128i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB35_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: .LBB35_2: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v24, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v24 +; CHECK-NEXT: vsrl.vi v24, v8, 4 +; CHECK-NEXT: lui a1, 1 +; CHECK-NEXT: addi a1, a1, -241 +; CHECK-NEXT: vand.vx v24, v24, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: vsrl.vi v24, v8, 2 +; CHECK-NEXT: lui a2, 3 +; CHECK-NEXT: addi a2, a2, 819 +; CHECK-NEXT: vand.vx v24, v24, a2 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: vsrl.vi v24, v8, 1 +; CHECK-NEXT: lui a3, 5 +; CHECK-NEXT: addi a3, a3, 1365 +; CHECK-NEXT: vand.vx v24, v24, a3 +; CHECK-NEXT: vand.vx v8, v8, a3 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: addi a4, a0, -64 +; CHECK-NEXT: sltu a0, a0, a4 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a4 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v24, v16, 8 +; CHECK-NEXT: vsll.vi v16, v16, 8 +; CHECK-NEXT: vor.vv v16, v16, v24 +; CHECK-NEXT: vsrl.vi v24, v16, 4 +; CHECK-NEXT: vand.vx v24, v24, a1 +; CHECK-NEXT: vand.vx v16, v16, a1 +; CHECK-NEXT: vsll.vi v16, v16, 4 +; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: vsrl.vi v24, v16, 2 +; CHECK-NEXT: vand.vx v24, v24, a2 +; CHECK-NEXT: vand.vx v16, v16, a2 +; CHECK-NEXT: vsll.vi v16, v16, 2 +; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: vsrl.vi v24, v16, 1 +; CHECK-NEXT: vand.vx v24, v24, a3 +; CHECK-NEXT: vand.vx v16, v16, a3 +; CHECK-NEXT: vadd.vv v16, v16, v16 +; CHECK-NEXT: vor.vv v16, v24, v16 +; CHECK-NEXT: ret %head = insertelement <128 x i1> poison, i1 true, i32 0 %m = shufflevector <128 x i1> %head, <128 x i1> poison, <128 x i32> zeroinitializer %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll index 06256d49f12b2..74a3153b5839d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -47,21 +47,21 @@ define void @bitreverse_v8i16(ptr %x, ptr %y) { ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: lui a1, 1 -; RV64-NEXT: addiw a1, a1, -241 +; RV64-NEXT: addi a1, a1, -241 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 2 ; RV64-NEXT: lui a1, 3 -; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: addi a1, a1, 819 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 1 ; RV64-NEXT: lui a1, 5 -; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addi a1, a1, 1365 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -130,7 +130,7 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) { ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: vsrl.vi v9, v8, 8 ; RV64-NEXT: lui a1, 16 -; RV64-NEXT: addiw a1, a1, -256 +; RV64-NEXT: addi a1, a1, -256 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 @@ -141,21 +141,21 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) { ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: lui a1, 61681 -; RV64-NEXT: addiw a1, a1, -241 +; RV64-NEXT: addi a1, a1, -241 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 2 ; RV64-NEXT: lui a1, 209715 -; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: addi a1, a1, 819 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 1 ; RV64-NEXT: lui a1, 349525 -; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addi a1, a1, 1365 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -368,21 +368,21 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) { ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: lui a1, 1 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 ; LMULMAX2-RV64-NEXT: lui a1, 3 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV64-NEXT: lui a1, 5 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 @@ -453,21 +453,21 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: lui a2, 1 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -241 +; LMULMAX1-RV64-NEXT: addi a2, a2, -241 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 ; LMULMAX1-RV64-NEXT: lui a3, 3 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 +; LMULMAX1-RV64-NEXT: addi a3, a3, 819 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX1-RV64-NEXT: lui a4, 5 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 1365 +; LMULMAX1-RV64-NEXT: addi a4, a4, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 @@ -555,7 +555,7 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 ; LMULMAX2-RV64-NEXT: lui a1, 16 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -256 +; LMULMAX2-RV64-NEXT: addi a1, a1, -256 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 @@ -566,21 +566,21 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 ; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 @@ -662,7 +662,7 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 ; LMULMAX1-RV64-NEXT: lui a2, 16 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -256 +; LMULMAX1-RV64-NEXT: addi a2, a2, -256 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 @@ -673,21 +673,21 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: lui a3, 61681 -; LMULMAX1-RV64-NEXT: addiw a3, a3, -241 +; LMULMAX1-RV64-NEXT: addi a3, a3, -241 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 ; LMULMAX1-RV64-NEXT: lui a4, 209715 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 819 +; LMULMAX1-RV64-NEXT: addi a4, a4, 819 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX1-RV64-NEXT: lui a5, 349525 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 1365 +; LMULMAX1-RV64-NEXT: addi a5, a5, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index 050fa3bdaa7c9..22061040ddbc1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -119,73 +119,41 @@ define <16 x i16> @vp_bswap_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { declare <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32>, <2 x i1>, i32) define <2 x i32> @vp_bswap_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsll.vi v10, v10, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV64-NEXT: vor.vv v9, v9, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsll.vi v10, v10, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t +; CHECK-NEXT: vor.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret %v = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) ret <2 x i32> %v } define <2 x i32> @vp_bswap_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_v2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_v2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) @@ -195,73 +163,41 @@ define <2 x i32> @vp_bswap_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { declare <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32>, <4 x i1>, i32) define <4 x i32> @vp_bswap_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsll.vi v10, v10, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t -; RV64-NEXT: vor.vv v9, v9, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsll.vi v10, v10, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t +; CHECK-NEXT: vor.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret %v = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) ret <4 x i32> %v } define <4 x i32> @vp_bswap_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_v4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_v4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) @@ -271,73 +207,41 @@ define <4 x i32> @vp_bswap_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { declare <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32>, <8 x i1>, i32) define <8 x i32> @vp_bswap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_v8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV64-NEXT: vor.vv v10, v10, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsll.vi v12, v12, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t +; CHECK-NEXT: vor.vv v10, v10, v12, v0.t +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret %v = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) ret <8 x i32> %v } define <8 x i32> @vp_bswap_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_v8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_v8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: vor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) @@ -347,73 +251,41 @@ define <8 x i32> @vp_bswap_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { declare <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32>, <16 x i1>, i32) define <16 x i32> @vp_bswap_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_v16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vand.vx v16, v8, a0, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 24, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_v16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV64-NEXT: vor.vv v12, v12, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsll.vi v16, v16, 8, v0.t -; RV64-NEXT: vsll.vi v8, v8, 24, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t +; CHECK-NEXT: vor.vv v12, v12, v16, v0.t +; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret %v = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) ret <16 x i32> %v } define <16 x i32> @vp_bswap_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_bswap_v16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bswap_v16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: vp_bswap_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -256 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsrl.vi v16, v8, 24 +; CHECK-NEXT: vor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vsll.vi v16, v16, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll index 4d78da2d64760..628a3e072abcd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -33,41 +33,23 @@ define void @bswap_v8i16(ptr %x, ptr %y) { declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) define void @bswap_v4i32(ptr %x, ptr %y) { -; RV32-LABEL: bswap_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: vand.vx v9, v9, a1 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a1 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: bswap_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a1, 16 -; RV64-NEXT: addiw a1, a1, -256 -; RV64-NEXT: vand.vx v9, v9, a1 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: bswap_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a1, 16 +; CHECK-NEXT: addi a1, a1, -256 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a1 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVKB-LABEL: bswap_v4i32: ; ZVKB: # %bb.0: @@ -269,7 +251,7 @@ define void @bswap_v8i32(ptr %x, ptr %y) { ; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 ; LMULMAX2-RV64-NEXT: lui a1, 16 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -256 +; LMULMAX2-RV64-NEXT: addi a1, a1, -256 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 @@ -319,7 +301,7 @@ define void @bswap_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 ; LMULMAX1-RV64-NEXT: lui a2, 16 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -256 +; LMULMAX1-RV64-NEXT: addi a2, a2, -256 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll index 9e4584eb17ff9..2bbc04172bd14 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll @@ -236,7 +236,7 @@ define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d) ; RV64: # %bb.0: ; RV64-NEXT: addiw a1, a1, 25 ; RV64-NEXT: addiw a2, a2, 1 -; RV64-NEXT: addiw a3, a3, 2047 +; RV64-NEXT: addi a3, a3, 2047 ; RV64-NEXT: addiw a3, a3, 308 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vslide1down.vx v8, v8, a0 @@ -273,7 +273,7 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) { ; RV64: # %bb.0: ; RV64-NEXT: addiw a1, a1, 25 ; RV64-NEXT: addiw a2, a2, 1 -; RV64-NEXT: addiw a3, a3, 2047 +; RV64-NEXT: addi a3, a3, 2047 ; RV64-NEXT: addiw a3, a3, 308 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vslide1down.vx v8, v8, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index e53877f53833f..d47971ef5a13c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -247,141 +247,75 @@ define <16 x i8> @vp_ctlz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32) define <2 x i16> @vp_ctlz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl) ret <2 x i16> %v } define <2 x i16> @vp_ctlz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl) @@ -391,141 +325,75 @@ define <2 x i16> @vp_ctlz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32) define <4 x i16> @vp_ctlz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl) ret <4 x i16> %v } define <4 x i16> @vp_ctlz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl) @@ -535,141 +403,75 @@ define <4 x i16> @vp_ctlz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32) define <8 x i16> @vp_ctlz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl) ret <8 x i16> %v } define <8 x i16> @vp_ctlz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl) @@ -679,141 +481,75 @@ define <8 x i16> @vp_ctlz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32) define <16 x i16> @vp_ctlz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl) ret <16 x i16> %v } define <16 x i16> @vp_ctlz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl) @@ -823,153 +559,81 @@ define <16 x i16> @vp_ctlz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32) define <2 x i32> @vp_ctlz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl) ret <2 x i32> %v } define <2 x i32> @vp_ctlz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 16 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl) @@ -979,9 +643,261 @@ define <2 x i32> @vp_ctlz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32) define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v4i32: +; CHECK-LABEL: vp_ctlz_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 16 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32) + +define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 16 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32) + +define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 16 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32) + +define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t @@ -992,32 +908,48 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v8, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t ; RV32-NEXT: vor.vv v8, v8, v9, v0.t +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t +; RV32-NEXT: vor.vv v8, v8, v9, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vand.vv v10, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v9, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v4i32: +; RV64-LABEL: vp_ctlz_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t @@ -1028,14 +960,21 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t ; RV64-NEXT: vor.vv v8, v8, v9, v0.t +; RV64-NEXT: li a0, 32 +; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t +; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV64-NEXT: lui a0, 349525 ; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t ; RV64-NEXT: lui a0, 209715 ; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t @@ -1044,20 +983,25 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: lui a0, 4112 ; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret - %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) - ret <4 x i32> %v + %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v } -define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v4i32_unmasked: +define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 2 @@ -1068,32 +1012,48 @@ define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 16 ; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v9, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vand.vv v10, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v9 +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v4i32_unmasked: +; RV64-LABEL: vp_ctlz_v2i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV64-NEXT: vsrl.vi v9, v8, 1 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 2 @@ -1104,14 +1064,21 @@ define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 16 ; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: li a0, 32 +; RV64-NEXT: vsrl.vx v9, v8, a0 +; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vsrl.vi v9, v8, 1 ; RV64-NEXT: lui a0, 349525 ; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 ; RV64-NEXT: lui a0, 209715 ; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a0 @@ -1120,24 +1087,29 @@ define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: lui a0, 4112 ; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) - ret <4 x i32> %v + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v } -declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32) +declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32) -define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v8i32: +define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t @@ -1148,32 +1120,48 @@ define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v10, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v8i32: +; RV64-LABEL: vp_ctlz_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t @@ -1184,14 +1172,21 @@ define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t ; RV64-NEXT: vor.vv v8, v8, v10, v0.t +; RV64-NEXT: li a0, 32 +; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t +; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV64-NEXT: lui a0, 349525 ; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t ; RV64-NEXT: lui a0, 209715 ; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t @@ -1200,20 +1195,25 @@ define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: lui a0, 4112 ; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret - %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) - ret <8 x i32> %v + %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v } -define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v8i32_unmasked: +define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 2 @@ -1224,32 +1224,48 @@ define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 16 ; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v10 +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v8i32_unmasked: +; RV64-LABEL: vp_ctlz_v4i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV64-NEXT: vsrl.vi v10, v8, 1 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 2 @@ -1260,14 +1276,21 @@ define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 16 ; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: li a0, 32 +; RV64-NEXT: vsrl.vx v10, v8, a0 +; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vsrl.vi v10, v8, 1 ; RV64-NEXT: lui a0, 349525 ; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 ; RV64-NEXT: lui a0, 209715 ; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a0 @@ -1276,24 +1299,29 @@ define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; RV64-NEXT: vadd.vv v8, v8, v10 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: lui a0, 4112 ; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) - ret <8 x i32> %v + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v } -declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32) +declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32) -define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v16i32: +define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t @@ -1304,32 +1332,48 @@ define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t +; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v12, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v16i32: +; RV64-LABEL: vp_ctlz_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t @@ -1340,14 +1384,21 @@ define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t ; RV64-NEXT: vor.vv v8, v8, v12, v0.t +; RV64-NEXT: li a0, 32 +; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t +; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV64-NEXT: lui a0, 349525 ; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t ; RV64-NEXT: lui a0, 209715 ; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t @@ -1356,20 +1407,25 @@ define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: lui a0, 4112 ; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret - %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) - ret <16 x i32> %v + %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v } -define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v16i32_unmasked: +define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 2 @@ -1380,32 +1436,48 @@ define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 16 ; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v16i32_unmasked: +; RV64-LABEL: vp_ctlz_v8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV64-NEXT: vsrl.vi v12, v8, 1 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 2 @@ -1416,14 +1488,21 @@ define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 16 ; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: li a0, 32 +; RV64-NEXT: vsrl.vx v12, v8, a0 +; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vsrl.vi v12, v8, 1 ; RV64-NEXT: lui a0, 349525 ; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 ; RV64-NEXT: lui a0, 209715 ; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a0 @@ -1432,107 +1511,127 @@ define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; RV64-NEXT: vadd.vv v8, v8, v12 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: lui a0, 4112 ; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) - ret <16 x i32> %v + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v } -declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32) +declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) -define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v2i64: +define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9, v0.t +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v2i64: +; RV64-LABEL: vp_ctlz_v15i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: lui a0, 349525 ; RV64-NEXT: addiw a0, a0, 1365 ; RV64-NEXT: slli a1, a0, 32 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t +; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vsub.vv v8, v8, v16, v0.t ; RV64-NEXT: lui a0, 209715 ; RV64-NEXT: addiw a0, a0, 819 ; RV64-NEXT: slli a1, a0, 32 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 ; RV64-NEXT: slli a1, a0, 32 @@ -1546,97 +1645,112 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret - %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) - ret <2 x i64> %v + %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) + ret <15 x i64> %v } -define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v2i64_unmasked: +define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v2i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v9, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_ctlz_v15i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 8 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 16 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: li a0, 32 +; RV64-NEXT: vsrl.vx v16, v8, a0 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vnot.v v8, v8 +; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 ; RV64-NEXT: addiw a0, a0, 1365 ; RV64-NEXT: slli a1, a0, 32 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 +; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vsub.vv v8, v8, v16 ; RV64-NEXT: lui a0, 209715 ; RV64-NEXT: addiw a0, a0, 819 ; RV64-NEXT: slli a1, a0, 32 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vadd.vv v8, v8, v16 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 ; RV64-NEXT: slli a1, a0, 32 @@ -1650,101 +1764,116 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) - ret <2 x i64> %v + %head = insertelement <15 x i1> poison, i1 true, i32 0 + %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer + %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) + ret <15 x i64> %v } -declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32) +declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) -define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v4i64: +define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10, v0.t +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v4i64: +; RV64-LABEL: vp_ctlz_v16i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: lui a0, 349525 ; RV64-NEXT: addiw a0, a0, 1365 ; RV64-NEXT: slli a1, a0, 32 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t +; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vsub.vv v8, v8, v16, v0.t ; RV64-NEXT: lui a0, 209715 ; RV64-NEXT: addiw a0, a0, 819 ; RV64-NEXT: slli a1, a0, 32 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 ; RV64-NEXT: slli a1, a0, 32 @@ -1758,97 +1887,112 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret - %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) - ret <4 x i64> %v + %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v } -define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v4i64_unmasked: +define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsub.vv v8, v8, v10 +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v10 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v4i64_unmasked: +; RV64-LABEL: vp_ctlz_v16i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 8 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 16 +; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v10, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsrl.vx v16, v8, a0 +; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: vsrl.vi v16, v8, 1 ; RV64-NEXT: lui a0, 349525 ; RV64-NEXT: addiw a0, a0, 1365 ; RV64-NEXT: slli a1, a0, 32 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 +; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vsub.vv v8, v8, v16 ; RV64-NEXT: lui a0, 209715 ; RV64-NEXT: addiw a0, a0, 819 ; RV64-NEXT: slli a1, a0, 32 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vadd.vv v8, v8, v16 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 ; RV64-NEXT: slli a1, a0, 32 @@ -1862,248 +2006,54 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) - ret <4 x i64> %v + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v } -declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32) +declare <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32) -define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v8i64: +define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v24, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) - ret <8 x i64> %v -} - -define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v8i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v8i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v12, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) - ret <8 x i64> %v -} - -declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) - -define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v15i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: li a3, 16 ; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a0, a3, .LBB34_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB34_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t @@ -2118,236 +2068,128 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v15i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: ret - %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) - ret <15 x i64> %v -} - -define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v15i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v15i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) - ret <15 x i64> %v -} - -declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) - -define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t @@ -2356,45 +2198,108 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v16i64: +; RV64-LABEL: vp_ctlz_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB34_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB34_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t @@ -2405,66 +2310,121 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: vand.vx v16, v16, a2, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0, v0.t +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 +; RV64-NEXT: vand.vx v8, v8, a4, v0.t +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: vmul.vx v8, v8, a5, v0.t +; RV64-NEXT: li a6, 56 +; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t +; RV64-NEXT: addi a7, sp, 16 +; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill +; RV64-NEXT: addi a7, a0, -16 +; RV64-NEXT: sltu a0, a0, a7 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a7 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: vor.vv v16, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vnot.v v8, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: vand.vx v16, v16, a2, v0.t +; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v8, v8, a4, v0.t +; RV64-NEXT: vmul.vx v8, v8, a5, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a6, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret - %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) - ret <16 x i64> %v + %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v } -define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v16i64_unmasked: +define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vp_ctlz_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb +; RV32-NEXT: vmv8r.v v24, v16 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: li a2, 16 +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB35_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB35_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 2 @@ -2475,708 +2435,196 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 16 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsrl.vx v16, v8, a2 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vlse64.v v0, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v0 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vlse64.v v0, (a3), zero +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v16i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v16, a0 -; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) - ret <16 x i64> %v -} - -declare <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32) - -define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v24, v0, 2 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: li a3, 16 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a3, .LBB34_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB34_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vsrl.vi v8, v24, 1 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v24, v8, 16 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vnot.v v8, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; -; RV64-LABEL: vp_ctlz_v32i64: +; RV64-LABEL: vp_ctlz_v32i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB34_2 +; RV64-NEXT: bltu a0, a2, .LBB35_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB34_2: +; RV64-NEXT: .LBB35_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 1 +; RV64-NEXT: vor.vv v8, v8, v24 +; RV64-NEXT: vsrl.vi v24, v8, 2 +; RV64-NEXT: vor.vv v8, v8, v24 +; RV64-NEXT: vsrl.vi v24, v8, 4 +; RV64-NEXT: vor.vv v8, v8, v24 +; RV64-NEXT: vsrl.vi v24, v8, 8 +; RV64-NEXT: vor.vv v8, v8, v24 +; RV64-NEXT: vsrl.vi v24, v8, 16 +; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a1 +; RV64-NEXT: vor.vv v8, v8, v24 +; RV64-NEXT: vnot.v v8, v8 +; RV64-NEXT: vsrl.vi v24, v8, 1 ; RV64-NEXT: lui a2, 349525 ; RV64-NEXT: addiw a2, a2, 1365 ; RV64-NEXT: slli a3, a2, 32 ; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v24, v24, a2 +; RV64-NEXT: vsub.vv v8, v8, v24 ; RV64-NEXT: lui a3, 209715 ; RV64-NEXT: addiw a3, a3, 819 ; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v24, v8, a3 +; RV64-NEXT: vsrl.vi v8, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vadd.vv v8, v24, v8 +; RV64-NEXT: vsrl.vi v24, v8, 4 +; RV64-NEXT: vadd.vv v8, v8, v24 ; RV64-NEXT: lui a4, 61681 ; RV64-NEXT: addiw a4, a4, -241 ; RV64-NEXT: slli a5, a4, 32 ; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v8, v8, a4, v0.t +; RV64-NEXT: vand.vx v8, v8, a4 ; RV64-NEXT: lui a5, 4112 ; RV64-NEXT: addiw a5, a5, 257 ; RV64-NEXT: slli a6, a5, 32 ; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vmul.vx v8, v8, a5, v0.t +; RV64-NEXT: vmul.vx v8, v8, a5 ; RV64-NEXT: li a6, 56 -; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t -; RV64-NEXT: addi a7, sp, 16 -; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill +; RV64-NEXT: vsrl.vx v8, v8, a6 ; RV64-NEXT: addi a7, a0, -16 ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v16, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vmul.vx v8, v8, a5, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a6, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vsrl.vi v24, v16, 1 +; RV64-NEXT: vor.vv v16, v16, v24 +; RV64-NEXT: vsrl.vi v24, v16, 2 +; RV64-NEXT: vor.vv v16, v16, v24 +; RV64-NEXT: vsrl.vi v24, v16, 4 +; RV64-NEXT: vor.vv v16, v16, v24 +; RV64-NEXT: vsrl.vi v24, v16, 8 +; RV64-NEXT: vor.vv v16, v16, v24 +; RV64-NEXT: vsrl.vi v24, v16, 16 +; RV64-NEXT: vor.vv v16, v16, v24 +; RV64-NEXT: vsrl.vx v24, v16, a1 +; RV64-NEXT: vor.vv v16, v16, v24 +; RV64-NEXT: vnot.v v16, v16 +; RV64-NEXT: vsrl.vi v24, v16, 1 +; RV64-NEXT: vand.vx v24, v24, a2 +; RV64-NEXT: vsub.vv v16, v16, v24 +; RV64-NEXT: vand.vx v24, v16, a3 +; RV64-NEXT: vsrl.vi v16, v16, 2 +; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: vadd.vv v16, v24, v16 +; RV64-NEXT: vsrl.vi v24, v16, 4 +; RV64-NEXT: vadd.vv v16, v16, v24 +; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vmul.vx v16, v16, a5 +; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret - %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) - ret <32 x i64> %v -} - -define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_v32i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: vmv8r.v v24, v16 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: li a2, 16 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB35_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB35_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 1 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_v32i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB35_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB35_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vsrl.vi v24, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsrl.vx v24, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: vand.vx v24, v24, a2 -; RV64-NEXT: vsub.vv v8, v8, v24 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: addiw a3, a3, 819 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v24, v8, a3 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: vsrl.vi v24, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v24 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: addiw a4, a4, -241 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: lui a5, 4112 -; RV64-NEXT: addiw a5, a5, 257 -; RV64-NEXT: slli a6, a5, 32 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: vmul.vx v8, v8, a5 -; RV64-NEXT: li a6, 56 -; RV64-NEXT: vsrl.vx v8, v8, a6 -; RV64-NEXT: addi a7, a0, -16 -; RV64-NEXT: sltu a0, a0, a7 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 2 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vi v24, v16, 16 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsrl.vx v24, v16, a1 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vnot.v v16, v16 -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vand.vx v24, v24, a2 -; RV64-NEXT: vsub.vv v16, v16, v24 -; RV64-NEXT: vand.vx v24, v16, a3 -; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a3 -; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vmul.vx v16, v16, a5 -; RV64-NEXT: vsrl.vx v16, v16, a6 -; RV64-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer + %head = insertelement <32 x i1> poison, i1 true, i32 0 + %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) ret <32 x i64> %v } @@ -3414,141 +2862,75 @@ define <16 x i8> @vp_ctlz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext % } define <2 x i16> @vp_ctlz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl) ret <2 x i16> %v } define <2 x i16> @vp_ctlz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl) @@ -3556,141 +2938,75 @@ define <2 x i16> @vp_ctlz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext % } define <4 x i16> @vp_ctlz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl) ret <4 x i16> %v } define <4 x i16> @vp_ctlz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl) @@ -3698,141 +3014,75 @@ define <4 x i16> @vp_ctlz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext % } define <8 x i16> @vp_ctlz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl) ret <8 x i16> %v } define <8 x i16> @vp_ctlz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl) @@ -3840,141 +3090,75 @@ define <8 x i16> @vp_ctlz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext % } define <16 x i16> @vp_ctlz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl) ret <16 x i16> %v } define <16 x i16> @vp_ctlz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl) @@ -3982,153 +3166,81 @@ define <16 x i16> @vp_ctlz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroex } define <2 x i32> @vp_ctlz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl) ret <2 x i32> %v } define <2 x i32> @vp_ctlz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 16 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl) @@ -4136,153 +3248,81 @@ define <2 x i32> @vp_ctlz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext % } define <4 x i32> @vp_ctlz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl) ret <4 x i32> %v -} - -define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +} + +define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 16 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl) @@ -4290,153 +3330,81 @@ define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext % } define <8 x i32> @vp_ctlz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl) ret <8 x i32> %v } define <8 x i32> @vp_ctlz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 16 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl) @@ -4444,153 +3412,81 @@ define <8 x i32> @vp_ctlz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext % } define <16 x i32> @vp_ctlz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl) ret <16 x i32> %v } define <16 x i32> @vp_ctlz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctlz_zero_undef_v16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctlz_zero_undef_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 2 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 16 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll index 9ad8beac56c4b..9c30d3ac71679 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -123,11 +123,11 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 5 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 3 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -135,7 +135,7 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 1 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: li a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 @@ -143,75 +143,40 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: ctlz_v8i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 5 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 3 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 1 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: li a1, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctlz_v8i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 5 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 1 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: li a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: ctlz_v8i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: vle16.v v8, (a0) +; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX1-NEXT: vor.vv v8, v8, v9 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 2 +; LMULMAX1-NEXT: vor.vv v8, v8, v9 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX1-NEXT: vor.vv v8, v8, v9 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 8 +; LMULMAX1-NEXT: vor.vv v8, v8, v9 +; LMULMAX1-NEXT: vnot.v v8, v8 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX1-NEXT: lui a1, 5 +; LMULMAX1-NEXT: addi a1, a1, 1365 +; LMULMAX1-NEXT: vand.vx v9, v9, a1 +; LMULMAX1-NEXT: vsub.vv v8, v8, v9 +; LMULMAX1-NEXT: lui a1, 3 +; LMULMAX1-NEXT: addi a1, a1, 819 +; LMULMAX1-NEXT: vand.vx v9, v8, a1 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v9 +; LMULMAX1-NEXT: lui a1, 1 +; LMULMAX1-NEXT: addi a1, a1, -241 +; LMULMAX1-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-NEXT: li a1, 257 +; LMULMAX1-NEXT: vmul.vx v8, v8, a1 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: ret ; ; LMULMAX2-RV32F-LABEL: ctlz_v8i16: ; LMULMAX2-RV32F: # %bb.0: @@ -349,11 +314,11 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -361,10 +326,10 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) @@ -769,193 +734,99 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind { declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) define void @ctlz_v16i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: ctlz_v16i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 5 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 3 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 1 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: ctlz_v16i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 5 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 1 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctlz_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a2, 5 -; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a3, 3 -; LMULMAX1-RV32-NEXT: addi a3, a3, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a4, 1 -; LMULMAX1-RV32-NEXT: addi a4, a4, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: li a5, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vnot.v v9, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctlz_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a2, 5 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a3, 3 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 1 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: li a5, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vnot.v v9, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-LABEL: ctlz_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-NEXT: vle16.v v8, (a0) +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 8 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vnot.v v8, v8 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: lui a1, 5 +; LMULMAX2-NEXT: addi a1, a1, 1365 +; LMULMAX2-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 3 +; LMULMAX2-NEXT: addi a1, a1, 819 +; LMULMAX2-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 1 +; LMULMAX2-NEXT: addi a1, a1, -241 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: li a1, 257 +; LMULMAX2-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX2-NEXT: vse16.v v8, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: ctlz_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle16.v v8, (a1) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 8 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vnot.v v8, v8 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: lui a2, 5 +; LMULMAX1-NEXT: addi a2, a2, 1365 +; LMULMAX1-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a3, 3 +; LMULMAX1-NEXT: addi a3, a3, 819 +; LMULMAX1-NEXT: vand.vx v10, v8, a3 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a4, 1 +; LMULMAX1-NEXT: addi a4, a4, -241 +; LMULMAX1-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-NEXT: li a5, 257 +; LMULMAX1-NEXT: vmul.vx v8, v8, a5 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 2 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 8 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vnot.v v9, v9 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-NEXT: vsub.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v10, v9, a3 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-NEXT: vmul.vx v9, v9, a5 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 8 +; LMULMAX1-NEXT: vse16.v v9, (a0) +; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: ctlz_v16i16: ; LMULMAX8: # %bb.0: @@ -1041,11 +912,11 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -1053,10 +924,10 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) @@ -1461,11 +1332,11 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 5 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 3 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -1473,7 +1344,7 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 1 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: li a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 @@ -1481,75 +1352,40 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: ctlz_zero_undef_v8i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 5 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 3 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 1 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: li a1, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctlz_zero_undef_v8i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 5 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 1 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: li a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: ctlz_zero_undef_v8i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: vle16.v v8, (a0) +; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX1-NEXT: vor.vv v8, v8, v9 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 2 +; LMULMAX1-NEXT: vor.vv v8, v8, v9 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX1-NEXT: vor.vv v8, v8, v9 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 8 +; LMULMAX1-NEXT: vor.vv v8, v8, v9 +; LMULMAX1-NEXT: vnot.v v8, v8 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX1-NEXT: lui a1, 5 +; LMULMAX1-NEXT: addi a1, a1, 1365 +; LMULMAX1-NEXT: vand.vx v9, v9, a1 +; LMULMAX1-NEXT: vsub.vv v8, v8, v9 +; LMULMAX1-NEXT: lui a1, 3 +; LMULMAX1-NEXT: addi a1, a1, 819 +; LMULMAX1-NEXT: vand.vx v9, v8, a1 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v9 +; LMULMAX1-NEXT: lui a1, 1 +; LMULMAX1-NEXT: addi a1, a1, -241 +; LMULMAX1-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-NEXT: li a1, 257 +; LMULMAX1-NEXT: vmul.vx v8, v8, a1 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: ret ; ; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v8i16: ; LMULMAX2-RV32F: # %bb.0: @@ -1676,11 +1512,11 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -1688,10 +1524,10 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) @@ -2069,193 +1905,99 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { } define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: ctlz_zero_undef_v16i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 5 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 3 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 1 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: ctlz_zero_undef_v16i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 5 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 1 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctlz_zero_undef_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a2, 5 -; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a3, 3 -; LMULMAX1-RV32-NEXT: addi a3, a3, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a4, 1 -; LMULMAX1-RV32-NEXT: addi a4, a4, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: li a5, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vnot.v v9, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctlz_zero_undef_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a2, 5 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a3, 3 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 1 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: li a5, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vnot.v v9, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-LABEL: ctlz_zero_undef_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-NEXT: vle16.v v8, (a0) +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 8 +; LMULMAX2-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-NEXT: vnot.v v8, v8 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: lui a1, 5 +; LMULMAX2-NEXT: addi a1, a1, 1365 +; LMULMAX2-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 3 +; LMULMAX2-NEXT: addi a1, a1, 819 +; LMULMAX2-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 1 +; LMULMAX2-NEXT: addi a1, a1, -241 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: li a1, 257 +; LMULMAX2-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX2-NEXT: vse16.v v8, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: ctlz_zero_undef_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle16.v v8, (a1) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 8 +; LMULMAX1-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-NEXT: vnot.v v8, v8 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: lui a2, 5 +; LMULMAX1-NEXT: addi a2, a2, 1365 +; LMULMAX1-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a3, 3 +; LMULMAX1-NEXT: addi a3, a3, 819 +; LMULMAX1-NEXT: vand.vx v10, v8, a3 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a4, 1 +; LMULMAX1-NEXT: addi a4, a4, -241 +; LMULMAX1-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-NEXT: li a5, 257 +; LMULMAX1-NEXT: vmul.vx v8, v8, a5 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 2 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 8 +; LMULMAX1-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-NEXT: vnot.v v9, v9 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-NEXT: vsub.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v10, v9, a3 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-NEXT: vmul.vx v9, v9, a5 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 8 +; LMULMAX1-NEXT: vse16.v v9, (a0) +; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: ctlz_zero_undef_v16i16: ; LMULMAX8: # %bb.0: @@ -2338,11 +2080,11 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -2350,10 +2092,10 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) @@ -2627,3 +2369,8 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { store <4 x i64> %c, ptr %x ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; LMULMAX1-RV32: {{.*}} +; LMULMAX1-RV64: {{.*}} +; LMULMAX2-RV32: {{.*}} +; LMULMAX2-RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index 55485beff8eb1..08f7e2058ad29 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -191,105 +191,57 @@ define <16 x i8> @vp_ctpop_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32) define <2 x i16> @vp_ctpop_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl) ret <2 x i16> %v } define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl) @@ -299,105 +251,57 @@ define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32) define <4 x i16> @vp_ctpop_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl) ret <4 x i16> %v } define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl) @@ -407,105 +311,57 @@ define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32) define <8 x i16> @vp_ctpop_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl) ret <8 x i16> %v } define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl) @@ -515,105 +371,57 @@ define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32) define <16 x i16> @vp_ctpop_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl) ret <16 x i16> %v } define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl) @@ -623,109 +431,59 @@ define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32) define <2 x i32> @vp_ctpop_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) ret <2 x i32> %v } define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) @@ -735,109 +493,59 @@ define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32) define <4 x i32> @vp_ctpop_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) ret <4 x i32> %v } define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) @@ -847,109 +555,59 @@ define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32) define <8 x i32> @vp_ctpop_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) ret <8 x i32> %v } define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) @@ -959,109 +617,59 @@ define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32) define <16 x i32> @vp_ctpop_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) ret <16 x i32> %v } define <16 x i32> @vp_ctpop_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_ctpop_v16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctpop_v16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_ctpop_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll index c7b6db226ee5f..0b2a44fa14526 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -42,109 +42,31 @@ define void @ctpop_v16i8(ptr %x, ptr %y) { declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) define void @ctpop_v8i16(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: ctpop_v8i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 5 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 3 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 1 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: ctpop_v8i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 5 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 3 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 1 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctpop_v8i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 5 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 3 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 1 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: li a1, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctpop_v8i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 5 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 1 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: li a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ctpop_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a1, 5 +; CHECK-NEXT: addi a1, a1, 1365 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a1, 3 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: vand.vx v9, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a1, 1 +; CHECK-NEXT: addi a1, a1, -241 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: li a1, 257 +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v8i16: ; ZVBB: # %bb.0: @@ -162,113 +84,32 @@ define void @ctpop_v8i16(ptr %x, ptr %y) { declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) define void @ctpop_v4i32(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: ctpop_v4i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: ctpop_v4i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctpop_v4i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 209715 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 61681 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a1, a1, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctpop_v4i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 349525 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 209715 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 61681 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ctpop_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a1, 349525 +; CHECK-NEXT: addi a1, a1, 1365 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a1, 209715 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: vand.vx v9, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a1, 61681 +; CHECK-NEXT: addi a1, a1, -241 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: lui a1, 4112 +; CHECK-NEXT: addi a1, a1, 257 +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v4i32: ; ZVBB: # %bb.0: @@ -523,139 +364,72 @@ define void @ctpop_v32i8(ptr %x, ptr %y) { declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) define void @ctpop_v16i16(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: ctpop_v16i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 5 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 3 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 1 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: ctpop_v16i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 5 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 1 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctpop_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a2, 5 -; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a3, 3 -; LMULMAX1-RV32-NEXT: addi a3, a3, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a4, 1 -; LMULMAX1-RV32-NEXT: addi a4, a4, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: li a5, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-LABEL: ctpop_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-NEXT: vle16.v v8, (a0) +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: lui a1, 5 +; LMULMAX2-NEXT: addi a1, a1, 1365 +; LMULMAX2-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 3 +; LMULMAX2-NEXT: addi a1, a1, 819 +; LMULMAX2-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 1 +; LMULMAX2-NEXT: addi a1, a1, -241 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: li a1, 257 +; LMULMAX2-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX2-NEXT: vse16.v v8, (a0) +; LMULMAX2-NEXT: ret ; -; LMULMAX1-RV64-LABEL: ctpop_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a2, 5 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a3, 3 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 1 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: li a5, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: ctpop_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle16.v v8, (a1) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: lui a2, 5 +; LMULMAX1-NEXT: addi a2, a2, 1365 +; LMULMAX1-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a3, 3 +; LMULMAX1-NEXT: addi a3, a3, 819 +; LMULMAX1-NEXT: vand.vx v10, v8, a3 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a4, 1 +; LMULMAX1-NEXT: addi a4, a4, -241 +; LMULMAX1-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-NEXT: li a5, 257 +; LMULMAX1-NEXT: vmul.vx v8, v8, a5 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-NEXT: vsub.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v10, v9, a3 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-NEXT: vmul.vx v9, v9, a5 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 8 +; LMULMAX1-NEXT: vse16.v v9, (a0) +; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: ret ; ; ZVBB-LABEL: ctpop_v16i16: ; ZVBB: # %bb.0: @@ -673,143 +447,74 @@ define void @ctpop_v16i16(ptr %x, ptr %y) { declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) define void @ctpop_v8i32(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: ctpop_v8i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: ctpop_v8i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctpop_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a2, 349525 -; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a3, 209715 -; LMULMAX1-RV32-NEXT: addi a3, a3, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a4, 61681 -; LMULMAX1-RV32-NEXT: addi a4, a4, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: lui a5, 4112 -; LMULMAX1-RV32-NEXT: addi a5, a5, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-LABEL: ctpop_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-NEXT: vle32.v v8, (a0) +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: lui a1, 349525 +; LMULMAX2-NEXT: addi a1, a1, 1365 +; LMULMAX2-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 209715 +; LMULMAX2-NEXT: addi a1, a1, 819 +; LMULMAX2-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 61681 +; LMULMAX2-NEXT: addi a1, a1, -241 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: lui a1, 4112 +; LMULMAX2-NEXT: addi a1, a1, 257 +; LMULMAX2-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 24 +; LMULMAX2-NEXT: vse32.v v8, (a0) +; LMULMAX2-NEXT: ret ; -; LMULMAX1-RV64-LABEL: ctpop_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a2, 349525 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a3, 209715 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 61681 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: lui a5, 4112 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: ctpop_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle32.v v8, (a1) +; LMULMAX1-NEXT: vle32.v v9, (a0) +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: lui a2, 349525 +; LMULMAX1-NEXT: addi a2, a2, 1365 +; LMULMAX1-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a3, 209715 +; LMULMAX1-NEXT: addi a3, a3, 819 +; LMULMAX1-NEXT: vand.vx v10, v8, a3 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a4, 61681 +; LMULMAX1-NEXT: addi a4, a4, -241 +; LMULMAX1-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-NEXT: lui a5, 4112 +; LMULMAX1-NEXT: addi a5, a5, 257 +; LMULMAX1-NEXT: vmul.vx v8, v8, a5 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 24 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-NEXT: vsub.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v10, v9, a3 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-NEXT: vmul.vx v9, v9, a5 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 24 +; LMULMAX1-NEXT: vse32.v v9, (a0) +; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: ret ; ; ZVBB-LABEL: ctpop_v8i32: ; ZVBB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index 28df7f083c4a0..345e4180bba31 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -223,121 +223,65 @@ define <16 x i8> @vp_cttz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32) define <2 x i16> @vp_cttz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl) ret <2 x i16> %v } define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl) @@ -347,121 +291,65 @@ define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32) define <4 x i16> @vp_cttz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl) ret <4 x i16> %v } define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl) @@ -471,121 +359,65 @@ define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32) define <8 x i16> @vp_cttz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl) ret <8 x i16> %v } define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl) @@ -595,121 +427,65 @@ define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32) define <16 x i16> @vp_cttz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v16i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v16i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl) ret <16 x i16> %v } define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl) @@ -719,125 +495,67 @@ define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32) define <2 x i32> @vp_cttz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl) ret <2 x i32> %v } define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl) @@ -847,125 +565,67 @@ define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32) define <4 x i32> @vp_cttz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret - %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) +; CHECK-LABEL: vp_cttz_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) ret <4 x i32> %v } define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) @@ -975,125 +635,67 @@ define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32) define <8 x i32> @vp_cttz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v8i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) ret <8 x i32> %v } define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) @@ -1103,125 +705,67 @@ define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32) define <16 x i32> @vp_cttz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v16i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v16i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) ret <16 x i32> %v } define <16 x i32> @vp_cttz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_v16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_v16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsub.vx v12, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) @@ -2894,121 +2438,65 @@ define <16 x i8> @vp_cttz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext % } define <2 x i16> @vp_cttz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl) ret <2 x i16> %v } define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v2i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v2i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl) @@ -3016,121 +2504,65 @@ define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext % } define <4 x i16> @vp_cttz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl) ret <4 x i16> %v } define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v4i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v4i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl) @@ -3138,121 +2570,65 @@ define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext % } define <8 x i16> @vp_cttz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl) ret <8 x i16> %v } define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v8i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v8i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl) @@ -3260,121 +2636,65 @@ define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext % } define <16 x i16> @vp_cttz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v16i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v16i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t +; CHECK-NEXT: ret %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl) ret <16 x i16> %v } define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v16i16_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 5 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: li a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v16i16_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 5 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 1 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: li a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 3 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: li a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl) @@ -3382,251 +2702,135 @@ define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroex } define <2 x i32> @vp_cttz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl) ret <2 x i32> %v } define <2 x i32> @vp_cttz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v2i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v2i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl) - ret <2 x i32> %v -} - -define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v9, v8, v0.t -; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_cttz_zero_undef_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl) ret <4 x i32> %v } define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v4i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v9, v9, a0 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v4i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v9, v9, a0 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsub.vx v9, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl) @@ -3634,125 +2838,67 @@ define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext % } define <8 x i32> @vp_cttz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v8i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl) ret <8 x i32> %v } define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v8i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v10, v10, a0 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v8i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsub.vx v10, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl) @@ -3760,125 +2906,67 @@ define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext % } define <16 x i32> @vp_cttz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v16i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0, v0.t -; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vadd.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v16i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0, v0.t -; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t +; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t +; CHECK-NEXT: ret %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl) ret <16 x i32> %v } define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { -; RV32-LABEL: vp_cttz_zero_undef_v16i32_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: lui a0, 4112 -; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: vmul.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_cttz_zero_undef_v16i32_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 349525 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: vand.vx v12, v12, a0 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 209715 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addiw a0, a0, -241 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: ret +; CHECK-LABEL: vp_cttz_zero_undef_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsub.vx v12, v8, a1 +; CHECK-NEXT: vnot.v v8, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vi v12, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: lui a0, 4112 +; CHECK-NEXT: addi a0, a0, 257 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index 83774704f9ca9..1b422730f2ac7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -113,11 +113,11 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 5 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 3 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -125,7 +125,7 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 1 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: li a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 @@ -133,65 +133,35 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: cttz_v8i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a1, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 5 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 3 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 1 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: li a1, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: cttz_v8i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: li a1, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 5 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 1 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: li a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: cttz_v8i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: vle16.v v8, (a0) +; LMULMAX1-NEXT: li a1, 1 +; LMULMAX1-NEXT: vsub.vx v9, v8, a1 +; LMULMAX1-NEXT: vnot.v v8, v8 +; LMULMAX1-NEXT: vand.vv v8, v8, v9 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX1-NEXT: lui a1, 5 +; LMULMAX1-NEXT: addi a1, a1, 1365 +; LMULMAX1-NEXT: vand.vx v9, v9, a1 +; LMULMAX1-NEXT: vsub.vv v8, v8, v9 +; LMULMAX1-NEXT: lui a1, 3 +; LMULMAX1-NEXT: addi a1, a1, 819 +; LMULMAX1-NEXT: vand.vx v9, v8, a1 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v9 +; LMULMAX1-NEXT: lui a1, 1 +; LMULMAX1-NEXT: addi a1, a1, -241 +; LMULMAX1-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-NEXT: li a1, 257 +; LMULMAX1-NEXT: vmul.vx v8, v8, a1 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: ret ; ; LMULMAX2-RV32F-LABEL: cttz_v8i16: ; LMULMAX2-RV32F: # %bb.0: @@ -330,11 +300,11 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -342,10 +312,10 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) @@ -752,161 +722,83 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1) define void @cttz_v16i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: cttz_v16i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 1 -; LMULMAX2-RV32-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 5 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 3 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 1 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: cttz_v16i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 1 -; LMULMAX2-RV64-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 5 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 1 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: cttz_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a2, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a3, 5 -; LMULMAX1-RV32-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a4, 3 -; LMULMAX1-RV32-NEXT: addi a4, a4, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a5, 1 -; LMULMAX1-RV32-NEXT: addi a5, a5, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: li a6, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV32-NEXT: vnot.v v9, v9 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: cttz_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a2, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a3, 5 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 3 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a5, 1 -; LMULMAX1-RV64-NEXT: addiw a5, a5, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: li a6, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV64-NEXT: vnot.v v9, v9 -; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-LABEL: cttz_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-NEXT: vle16.v v8, (a0) +; LMULMAX2-NEXT: li a1, 1 +; LMULMAX2-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-NEXT: vnot.v v8, v8 +; LMULMAX2-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: lui a1, 5 +; LMULMAX2-NEXT: addi a1, a1, 1365 +; LMULMAX2-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 3 +; LMULMAX2-NEXT: addi a1, a1, 819 +; LMULMAX2-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 1 +; LMULMAX2-NEXT: addi a1, a1, -241 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: li a1, 257 +; LMULMAX2-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX2-NEXT: vse16.v v8, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: cttz_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle16.v v8, (a1) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: li a2, 1 +; LMULMAX1-NEXT: vsub.vx v10, v8, a2 +; LMULMAX1-NEXT: vnot.v v8, v8 +; LMULMAX1-NEXT: vand.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: lui a3, 5 +; LMULMAX1-NEXT: addi a3, a3, 1365 +; LMULMAX1-NEXT: vand.vx v10, v10, a3 +; LMULMAX1-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a4, 3 +; LMULMAX1-NEXT: addi a4, a4, 819 +; LMULMAX1-NEXT: vand.vx v10, v8, a4 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a5, 1 +; LMULMAX1-NEXT: addi a5, a5, -241 +; LMULMAX1-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-NEXT: li a6, 257 +; LMULMAX1-NEXT: vmul.vx v8, v8, a6 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX1-NEXT: vsub.vx v10, v9, a2 +; LMULMAX1-NEXT: vnot.v v9, v9 +; LMULMAX1-NEXT: vand.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vand.vx v10, v10, a3 +; LMULMAX1-NEXT: vsub.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v10, v9, a4 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-NEXT: vmul.vx v9, v9, a6 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 8 +; LMULMAX1-NEXT: vse16.v v9, (a0) +; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: cttz_v16i16: ; LMULMAX8: # %bb.0: @@ -981,11 +873,11 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -993,10 +885,10 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) @@ -1400,11 +1292,11 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 5 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 3 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -1412,7 +1304,7 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 1 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: li a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 @@ -1420,65 +1312,35 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64I-NEXT: ret ; -; LMULMAX1-RV32-LABEL: cttz_zero_undef_v8i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a1, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: lui a1, 5 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 3 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 1 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: li a1, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: cttz_zero_undef_v8i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: li a1, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 5 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 1 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: li a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: cttz_zero_undef_v8i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: vle16.v v8, (a0) +; LMULMAX1-NEXT: li a1, 1 +; LMULMAX1-NEXT: vsub.vx v9, v8, a1 +; LMULMAX1-NEXT: vnot.v v8, v8 +; LMULMAX1-NEXT: vand.vv v8, v8, v9 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX1-NEXT: lui a1, 5 +; LMULMAX1-NEXT: addi a1, a1, 1365 +; LMULMAX1-NEXT: vand.vx v9, v9, a1 +; LMULMAX1-NEXT: vsub.vv v8, v8, v9 +; LMULMAX1-NEXT: lui a1, 3 +; LMULMAX1-NEXT: addi a1, a1, 819 +; LMULMAX1-NEXT: vand.vx v9, v8, a1 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v9 +; LMULMAX1-NEXT: lui a1, 1 +; LMULMAX1-NEXT: addi a1, a1, -241 +; LMULMAX1-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-NEXT: li a1, 257 +; LMULMAX1-NEXT: vmul.vx v8, v8, a1 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: ret ; ; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v8i16: ; LMULMAX2-RV32F: # %bb.0: @@ -1601,11 +1463,11 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -1613,10 +1475,10 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) @@ -1987,161 +1849,83 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { } define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32-LABEL: cttz_zero_undef_v16i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 1 -; LMULMAX2-RV32-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 5 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 3 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 1 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: cttz_zero_undef_v16i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 1 -; LMULMAX2-RV64-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 5 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 1 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: cttz_zero_undef_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a2, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: vnot.v v8, v8 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a3, 5 -; LMULMAX1-RV32-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a4, 3 -; LMULMAX1-RV32-NEXT: addi a4, a4, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: lui a5, 1 -; LMULMAX1-RV32-NEXT: addi a5, a5, -241 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: li a6, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV32-NEXT: vnot.v v9, v9 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: cttz_zero_undef_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a2, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: vnot.v v8, v8 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a3, 5 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 3 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a5, 1 -; LMULMAX1-RV64-NEXT: addiw a5, a5, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: li a6, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-RV64-NEXT: vnot.v v9, v9 -; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX2-LABEL: cttz_zero_undef_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-NEXT: vle16.v v8, (a0) +; LMULMAX2-NEXT: li a1, 1 +; LMULMAX2-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-NEXT: vnot.v v8, v8 +; LMULMAX2-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-NEXT: lui a1, 5 +; LMULMAX2-NEXT: addi a1, a1, 1365 +; LMULMAX2-NEXT: vand.vx v10, v10, a1 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 3 +; LMULMAX2-NEXT: addi a1, a1, 819 +; LMULMAX2-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 1 +; LMULMAX2-NEXT: addi a1, a1, -241 +; LMULMAX2-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-NEXT: li a1, 257 +; LMULMAX2-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX2-NEXT: vse16.v v8, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: cttz_zero_undef_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle16.v v8, (a1) +; LMULMAX1-NEXT: vle16.v v9, (a0) +; LMULMAX1-NEXT: li a2, 1 +; LMULMAX1-NEXT: vsub.vx v10, v8, a2 +; LMULMAX1-NEXT: vnot.v v8, v8 +; LMULMAX1-NEXT: vand.vv v8, v8, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-NEXT: lui a3, 5 +; LMULMAX1-NEXT: addi a3, a3, 1365 +; LMULMAX1-NEXT: vand.vx v10, v10, a3 +; LMULMAX1-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a4, 3 +; LMULMAX1-NEXT: addi a4, a4, 819 +; LMULMAX1-NEXT: vand.vx v10, v8, a4 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-NEXT: lui a5, 1 +; LMULMAX1-NEXT: addi a5, a5, -241 +; LMULMAX1-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-NEXT: li a6, 257 +; LMULMAX1-NEXT: vmul.vx v8, v8, a6 +; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 +; LMULMAX1-NEXT: vsub.vx v10, v9, a2 +; LMULMAX1-NEXT: vnot.v v9, v9 +; LMULMAX1-NEXT: vand.vv v9, v9, v10 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-NEXT: vand.vx v10, v10, a3 +; LMULMAX1-NEXT: vsub.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v10, v9, a4 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-NEXT: vmul.vx v9, v9, a6 +; LMULMAX1-NEXT: vsrl.vi v9, v9, 8 +; LMULMAX1-NEXT: vse16.v v9, (a0) +; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: cttz_zero_undef_v16i16: ; LMULMAX8: # %bb.0: @@ -2212,11 +1996,11 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 @@ -2224,10 +2008,10 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 ; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 ; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) @@ -2501,3 +2285,8 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { store <4 x i64> %c, ptr %x ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; LMULMAX1-RV32: {{.*}} +; LMULMAX1-RV64: {{.*}} +; LMULMAX2-RV32: {{.*}} +; LMULMAX2-RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index 5605437443d76..95c1beb284c40 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -997,7 +997,7 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) { ; RV64NOM-NEXT: vand.vv v8, v8, v11 ; RV64NOM-NEXT: vadd.vv v8, v9, v8 ; RV64NOM-NEXT: lui a0, 12320 -; RV64NOM-NEXT: addiw a0, a0, 257 +; RV64NOM-NEXT: addi a0, a0, 257 ; RV64NOM-NEXT: vmv.s.x v9, a0 ; RV64NOM-NEXT: vsext.vf4 v10, v9 ; RV64NOM-NEXT: vsra.vv v8, v8, v10 @@ -1053,7 +1053,7 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) { ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64NOM-NEXT: vsrl.vi v8, v8, 0 ; RV64NOM-NEXT: lui a0, 322639 -; RV64NOM-NEXT: addiw a0, a0, -945 +; RV64NOM-NEXT: addi a0, a0, -945 ; RV64NOM-NEXT: vmulhu.vx v8, v8, a0 ; RV64NOM-NEXT: vslidedown.vi v8, v8, 2 ; RV64NOM-NEXT: vmv.x.s a0, v8 @@ -1064,7 +1064,7 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) { ; RV64M-LABEL: extractelt_udiv_v4i32: ; RV64M: # %bb.0: ; RV64M-NEXT: lui a0, 322639 -; RV64M-NEXT: addiw a0, a0, -945 +; RV64M-NEXT: addi a0, a0, -945 ; RV64M-NEXT: slli a0, a0, 32 ; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64M-NEXT: vslidedown.vi v8, v8, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index c1a4aa4b05d4b..ea818df7329c7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -236,79 +236,42 @@ define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) { } define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { -; RV32-V128-LABEL: interleave_v32f32: -; RV32-V128: # %bb.0: -; RV32-V128-NEXT: addi sp, sp, -16 -; RV32-V128-NEXT: .cfi_def_cfa_offset 16 -; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 2 -; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb -; RV32-V128-NEXT: lui a0, %hi(.LCPI10_0) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) -; RV32-V128-NEXT: li a1, 32 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV32-V128-NEXT: vle16.v v4, (a0) -; RV32-V128-NEXT: lui a0, %hi(.LCPI10_1) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_1) -; RV32-V128-NEXT: vle16.v v24, (a0) -; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: lui a0, 699051 -; RV32-V128-NEXT: addi a0, a0, -1366 -; RV32-V128-NEXT: vmv.s.x v0, a0 -; RV32-V128-NEXT: vrgatherei16.vv v24, v8, v4 -; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t -; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 -; RV32-V128-NEXT: li a0, -1 -; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 -; RV32-V128-NEXT: vmv8r.v v8, v0 -; RV32-V128-NEXT: vmv8r.v v16, v24 -; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 2 -; RV32-V128-NEXT: add sp, sp, a0 -; RV32-V128-NEXT: addi sp, sp, 16 -; RV32-V128-NEXT: ret -; -; RV64-V128-LABEL: interleave_v32f32: -; RV64-V128: # %bb.0: -; RV64-V128-NEXT: addi sp, sp, -16 -; RV64-V128-NEXT: .cfi_def_cfa_offset 16 -; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 2 -; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb -; RV64-V128-NEXT: lui a0, %hi(.LCPI10_0) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) -; RV64-V128-NEXT: li a1, 32 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV64-V128-NEXT: vle16.v v4, (a0) -; RV64-V128-NEXT: lui a0, %hi(.LCPI10_1) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_1) -; RV64-V128-NEXT: vle16.v v24, (a0) -; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: lui a0, 699051 -; RV64-V128-NEXT: addiw a0, a0, -1366 -; RV64-V128-NEXT: vmv.s.x v0, a0 -; RV64-V128-NEXT: vrgatherei16.vv v24, v8, v4 -; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t -; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 -; RV64-V128-NEXT: li a0, -1 -; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 -; RV64-V128-NEXT: vmv8r.v v8, v0 -; RV64-V128-NEXT: vmv8r.v v16, v24 -; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 2 -; RV64-V128-NEXT: add sp, sp, a0 -; RV64-V128-NEXT: addi sp, sp, 16 -; RV64-V128-NEXT: ret +; V128-LABEL: interleave_v32f32: +; V128: # %bb.0: +; V128-NEXT: addi sp, sp, -16 +; V128-NEXT: .cfi_def_cfa_offset 16 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: sub sp, sp, a0 +; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; V128-NEXT: lui a0, %hi(.LCPI10_0) +; V128-NEXT: addi a0, a0, %lo(.LCPI10_0) +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; V128-NEXT: vle16.v v4, (a0) +; V128-NEXT: lui a0, %hi(.LCPI10_1) +; V128-NEXT: addi a0, a0, %lo(.LCPI10_1) +; V128-NEXT: vle16.v v24, (a0) +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill +; V128-NEXT: lui a0, 699051 +; V128-NEXT: addi a0, a0, -1366 +; V128-NEXT: vmv.s.x v0, a0 +; V128-NEXT: vrgatherei16.vv v24, v8, v4 +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload +; V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t +; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; V128-NEXT: vwaddu.vv v0, v8, v16 +; V128-NEXT: li a0, -1 +; V128-NEXT: vwmaccu.vx v0, a0, v16 +; V128-NEXT: vmv8r.v v8, v0 +; V128-NEXT: vmv8r.v v16, v24 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: add sp, sp, a0 +; V128-NEXT: addi sp, sp, 16 +; V128-NEXT: ret ; ; V512-LABEL: interleave_v32f32: ; V512: # %bb.0: @@ -375,31 +338,18 @@ define <4 x float> @unary_interleave_v4f32(<4 x float> %x) { ; FIXME: Is there better codegen we can do here? define <4 x double> @unary_interleave_v4f64(<4 x double> %x) { -; RV32-V128-LABEL: unary_interleave_v4f64: -; RV32-V128: # %bb.0: -; RV32-V128-NEXT: lui a0, 12304 -; RV32-V128-NEXT: addi a0, a0, 512 -; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-V128-NEXT: vmv.s.x v10, a0 -; RV32-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-V128-NEXT: vsext.vf2 v12, v10 -; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12 -; RV32-V128-NEXT: vmv.v.v v8, v10 -; RV32-V128-NEXT: ret -; -; RV64-V128-LABEL: unary_interleave_v4f64: -; RV64-V128: # %bb.0: -; RV64-V128-NEXT: lui a0, 12304 -; RV64-V128-NEXT: addiw a0, a0, 512 -; RV64-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-V128-NEXT: vmv.s.x v10, a0 -; RV64-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-V128-NEXT: vsext.vf2 v12, v10 -; RV64-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-V128-NEXT: vrgatherei16.vv v10, v8, v12 -; RV64-V128-NEXT: vmv.v.v v8, v10 -; RV64-V128-NEXT: ret +; V128-LABEL: unary_interleave_v4f64: +; V128: # %bb.0: +; V128-NEXT: lui a0, 12304 +; V128-NEXT: addi a0, a0, 512 +; V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; V128-NEXT: vmv.s.x v10, a0 +; V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; V128-NEXT: vsext.vf2 v12, v10 +; V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; V128-NEXT: vrgatherei16.vv v10, v8, v12 +; V128-NEXT: vmv.v.v v8, v10 +; V128-NEXT: ret ; ; RV32-V512-LABEL: unary_interleave_v4f64: ; RV32-V512: # %bb.0: @@ -417,7 +367,7 @@ define <4 x double> @unary_interleave_v4f64(<4 x double> %x) { ; RV64-V512-LABEL: unary_interleave_v4f64: ; RV64-V512: # %bb.0: ; RV64-V512-NEXT: lui a0, 12304 -; RV64-V512-NEXT: addiw a0, a0, 512 +; RV64-V512-NEXT: addi a0, a0, 512 ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-V512-NEXT: vmv.s.x v9, a0 ; RV64-V512-NEXT: vsext.vf8 v10, v9 @@ -477,3 +427,6 @@ define <8 x float> @unary_interleave_v8f32(<8 x float> %x) { %a = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> ret <8 x float> %a } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32-V128: {{.*}} +; RV64-V128: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index a7852ea5843d8..59eb4b89a2f56 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -55,61 +55,35 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) { } define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) { -; RV32-LABEL: vrgather_permute_shuffle_vu_v4f64: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 4096 -; RV32-NEXT: addi a0, a0, 513 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV32-NEXT: vrgatherei16.vv v10, v8, v12 -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vrgather_permute_shuffle_vu_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4096 -; RV64-NEXT: addiw a0, a0, 513 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-NEXT: vsext.vf2 v12, v10 -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-NEXT: vrgatherei16.vv v10, v8, v12 -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: addi a0, a0, 513 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> ret <4 x double> %s } define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) { -; RV32-LABEL: vrgather_permute_shuffle_uv_v4f64: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 4096 -; RV32-NEXT: addi a0, a0, 513 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV32-NEXT: vrgatherei16.vv v10, v8, v12 -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vrgather_permute_shuffle_uv_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4096 -; RV64-NEXT: addiw a0, a0, 513 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-NEXT: vsext.vf2 v12, v10 -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-NEXT: vrgatherei16.vv v10, v8, v12 -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vrgather_permute_shuffle_uv_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: addi a0, a0, 513 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %s = shufflevector <4 x double> poison, <4 x double> %x, <4 x i32> ret <4 x double> %s } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index b648420aa2e03..e9412019a0dec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -324,40 +324,24 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() { } define <4 x i8> @buildvec_no_vid_v4i8_0() { -; RV32-LABEL: buildvec_no_vid_v4i8_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 28768 -; RV32-NEXT: addi a0, a0, 769 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_no_vid_v4i8_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 28768 -; RV64-NEXT: addiw a0, a0, 769 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_no_vid_v4i8_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 28768 +; CHECK-NEXT: addi a0, a0, 769 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret ret <4 x i8> } define <4 x i8> @buildvec_no_vid_v4i8_1() { -; RV32-LABEL: buildvec_no_vid_v4i8_1: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 28752 -; RV32-NEXT: addi a0, a0, 512 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_no_vid_v4i8_1: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 28752 -; RV64-NEXT: addiw a0, a0, 512 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_no_vid_v4i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 28752 +; CHECK-NEXT: addi a0, a0, 512 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret ret <4 x i8> } @@ -372,21 +356,13 @@ define <4 x i8> @buildvec_no_vid_v4i8_2() { } define <4 x i8> @buildvec_no_vid_v4i8_3() { -; RV32-LABEL: buildvec_no_vid_v4i8_3: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 28672 -; RV32-NEXT: addi a0, a0, 255 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_no_vid_v4i8_3: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 28672 -; RV64-NEXT: addiw a0, a0, 255 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_no_vid_v4i8_3: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 28672 +; CHECK-NEXT: addi a0, a0, 255 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret ret <4 x i8> } @@ -400,21 +376,13 @@ define <4 x i8> @buildvec_no_vid_v4i8_4() { } define <4 x i8> @buildvec_no_vid_v4i8_5() { -; RV32-LABEL: buildvec_no_vid_v4i8_5: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1032144 -; RV32-NEXT: addi a0, a0, -257 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_no_vid_v4i8_5: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1032144 -; RV64-NEXT: addiw a0, a0, -257 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_no_vid_v4i8_5: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1032144 +; CHECK-NEXT: addi a0, a0, -257 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret ret <4 x i8> } @@ -529,25 +497,15 @@ define void @buildvec_seq_v8i8_v4i16(ptr %x) { } define void @buildvec_seq_v8i8_v2i32(ptr %x) { -; RV32-LABEL: buildvec_seq_v8i8_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: lui a1, 48 -; RV32-NEXT: addi a1, a1, 513 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_seq_v8i8_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, 48 -; RV64-NEXT: addiw a1, a1, 513 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vmv.v.x v8, a1 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vse8.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_seq_v8i8_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 48 +; CHECK-NEXT: addi a1, a1, 513 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret store <8 x i8> , ptr %x ret void } @@ -823,21 +781,13 @@ define <4 x i8> @buildvec_not_vid_v4i8_1() { } define <4 x i8> @buildvec_not_vid_v4i8_2() { -; RV32-LABEL: buildvec_not_vid_v4i8_2: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, 771 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_not_vid_v4i8_2: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, 771 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_not_vid_v4i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, 771 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret ret <4 x i8> } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 83e64651c5c63..a54fa2e9b765f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -395,79 +395,42 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) { } define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { -; RV32-V128-LABEL: interleave_v32i32: -; RV32-V128: # %bb.0: -; RV32-V128-NEXT: addi sp, sp, -16 -; RV32-V128-NEXT: .cfi_def_cfa_offset 16 -; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 2 -; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb -; RV32-V128-NEXT: lui a0, %hi(.LCPI17_0) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) -; RV32-V128-NEXT: li a1, 32 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV32-V128-NEXT: vle16.v v4, (a0) -; RV32-V128-NEXT: lui a0, %hi(.LCPI17_1) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) -; RV32-V128-NEXT: vle16.v v24, (a0) -; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: lui a0, 699051 -; RV32-V128-NEXT: addi a0, a0, -1366 -; RV32-V128-NEXT: vmv.s.x v0, a0 -; RV32-V128-NEXT: vrgatherei16.vv v24, v8, v4 -; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t -; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 -; RV32-V128-NEXT: li a0, -1 -; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 -; RV32-V128-NEXT: vmv8r.v v8, v0 -; RV32-V128-NEXT: vmv8r.v v16, v24 -; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 2 -; RV32-V128-NEXT: add sp, sp, a0 -; RV32-V128-NEXT: addi sp, sp, 16 -; RV32-V128-NEXT: ret -; -; RV64-V128-LABEL: interleave_v32i32: -; RV64-V128: # %bb.0: -; RV64-V128-NEXT: addi sp, sp, -16 -; RV64-V128-NEXT: .cfi_def_cfa_offset 16 -; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 2 -; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb -; RV64-V128-NEXT: lui a0, %hi(.LCPI17_0) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) -; RV64-V128-NEXT: li a1, 32 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV64-V128-NEXT: vle16.v v4, (a0) -; RV64-V128-NEXT: lui a0, %hi(.LCPI17_1) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) -; RV64-V128-NEXT: vle16.v v24, (a0) -; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: lui a0, 699051 -; RV64-V128-NEXT: addiw a0, a0, -1366 -; RV64-V128-NEXT: vmv.s.x v0, a0 -; RV64-V128-NEXT: vrgatherei16.vv v24, v8, v4 -; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t -; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 -; RV64-V128-NEXT: li a0, -1 -; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 -; RV64-V128-NEXT: vmv8r.v v8, v0 -; RV64-V128-NEXT: vmv8r.v v16, v24 -; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 2 -; RV64-V128-NEXT: add sp, sp, a0 -; RV64-V128-NEXT: addi sp, sp, 16 -; RV64-V128-NEXT: ret +; V128-LABEL: interleave_v32i32: +; V128: # %bb.0: +; V128-NEXT: addi sp, sp, -16 +; V128-NEXT: .cfi_def_cfa_offset 16 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: sub sp, sp, a0 +; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; V128-NEXT: lui a0, %hi(.LCPI17_0) +; V128-NEXT: addi a0, a0, %lo(.LCPI17_0) +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; V128-NEXT: vle16.v v4, (a0) +; V128-NEXT: lui a0, %hi(.LCPI17_1) +; V128-NEXT: addi a0, a0, %lo(.LCPI17_1) +; V128-NEXT: vle16.v v24, (a0) +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill +; V128-NEXT: lui a0, 699051 +; V128-NEXT: addi a0, a0, -1366 +; V128-NEXT: vmv.s.x v0, a0 +; V128-NEXT: vrgatherei16.vv v24, v8, v4 +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload +; V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t +; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; V128-NEXT: vwaddu.vv v0, v8, v16 +; V128-NEXT: li a0, -1 +; V128-NEXT: vwmaccu.vx v0, a0, v16 +; V128-NEXT: vmv8r.v v8, v0 +; V128-NEXT: vmv8r.v v16, v24 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: add sp, sp, a0 +; V128-NEXT: addi sp, sp, 16 +; V128-NEXT: ret ; ; V512-LABEL: interleave_v32i32: ; V512: # %bb.0: @@ -509,49 +472,27 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) { ; This shouldn't be interleaved define <4 x i8> @unary_interleave_v4i8_invalid(<4 x i8> %x) { -; RV32-V128-LABEL: unary_interleave_v4i8_invalid: -; RV32-V128: # %bb.0: -; RV32-V128-NEXT: lui a0, 16 -; RV32-V128-NEXT: addi a0, a0, 768 -; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-V128-NEXT: vmv.s.x v10, a0 -; RV32-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; RV32-V128-NEXT: vrgather.vv v9, v8, v10 -; RV32-V128-NEXT: vmv1r.v v8, v9 -; RV32-V128-NEXT: ret -; -; RV64-V128-LABEL: unary_interleave_v4i8_invalid: -; RV64-V128: # %bb.0: -; RV64-V128-NEXT: lui a0, 16 -; RV64-V128-NEXT: addiw a0, a0, 768 -; RV64-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-V128-NEXT: vmv.s.x v10, a0 -; RV64-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; RV64-V128-NEXT: vrgather.vv v9, v8, v10 -; RV64-V128-NEXT: vmv1r.v v8, v9 -; RV64-V128-NEXT: ret -; -; RV32-V512-LABEL: unary_interleave_v4i8_invalid: -; RV32-V512: # %bb.0: -; RV32-V512-NEXT: lui a0, 16 -; RV32-V512-NEXT: addi a0, a0, 768 -; RV32-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma -; RV32-V512-NEXT: vmv.s.x v10, a0 -; RV32-V512-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV32-V512-NEXT: vrgather.vv v9, v8, v10 -; RV32-V512-NEXT: vmv1r.v v8, v9 -; RV32-V512-NEXT: ret +; V128-LABEL: unary_interleave_v4i8_invalid: +; V128: # %bb.0: +; V128-NEXT: lui a0, 16 +; V128-NEXT: addi a0, a0, 768 +; V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; V128-NEXT: vmv.s.x v10, a0 +; V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; V128-NEXT: vrgather.vv v9, v8, v10 +; V128-NEXT: vmv1r.v v8, v9 +; V128-NEXT: ret ; -; RV64-V512-LABEL: unary_interleave_v4i8_invalid: -; RV64-V512: # %bb.0: -; RV64-V512-NEXT: lui a0, 16 -; RV64-V512-NEXT: addiw a0, a0, 768 -; RV64-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma -; RV64-V512-NEXT: vmv.s.x v10, a0 -; RV64-V512-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV64-V512-NEXT: vrgather.vv v9, v8, v10 -; RV64-V512-NEXT: vmv1r.v v8, v9 -; RV64-V512-NEXT: ret +; V512-LABEL: unary_interleave_v4i8_invalid: +; V512: # %bb.0: +; V512-NEXT: lui a0, 16 +; V512-NEXT: addi a0, a0, 768 +; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; V512-NEXT: vmv.s.x v10, a0 +; V512-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; V512-NEXT: vrgather.vv v9, v8, v10 +; V512-NEXT: vmv1r.v v8, v9 +; V512-NEXT: ret %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> ret <4 x i8> %a } @@ -608,31 +549,18 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) { ; FIXME: Is there better codegen we can do here? define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) { -; RV32-V128-LABEL: unary_interleave_v4i64: -; RV32-V128: # %bb.0: -; RV32-V128-NEXT: lui a0, 12304 -; RV32-V128-NEXT: addi a0, a0, 512 -; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-V128-NEXT: vmv.s.x v10, a0 -; RV32-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-V128-NEXT: vsext.vf2 v12, v10 -; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12 -; RV32-V128-NEXT: vmv.v.v v8, v10 -; RV32-V128-NEXT: ret -; -; RV64-V128-LABEL: unary_interleave_v4i64: -; RV64-V128: # %bb.0: -; RV64-V128-NEXT: lui a0, 12304 -; RV64-V128-NEXT: addiw a0, a0, 512 -; RV64-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-V128-NEXT: vmv.s.x v10, a0 -; RV64-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-V128-NEXT: vsext.vf2 v12, v10 -; RV64-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-V128-NEXT: vrgatherei16.vv v10, v8, v12 -; RV64-V128-NEXT: vmv.v.v v8, v10 -; RV64-V128-NEXT: ret +; V128-LABEL: unary_interleave_v4i64: +; V128: # %bb.0: +; V128-NEXT: lui a0, 12304 +; V128-NEXT: addi a0, a0, 512 +; V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; V128-NEXT: vmv.s.x v10, a0 +; V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; V128-NEXT: vsext.vf2 v12, v10 +; V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; V128-NEXT: vrgatherei16.vv v10, v8, v12 +; V128-NEXT: vmv.v.v v8, v10 +; V128-NEXT: ret ; ; RV32-V512-LABEL: unary_interleave_v4i64: ; RV32-V512: # %bb.0: @@ -650,7 +578,7 @@ define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) { ; RV64-V512-LABEL: unary_interleave_v4i64: ; RV64-V512: # %bb.0: ; RV64-V512-NEXT: lui a0, 12304 -; RV64-V512-NEXT: addiw a0, a0, 512 +; RV64-V512-NEXT: addi a0, a0, 512 ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-V512-NEXT: vmv.s.x v9, a0 ; RV64-V512-NEXT: vsext.vf8 v10, v9 @@ -750,3 +678,6 @@ define <4 x i8> @unary_interleave_10uu_v4i8(<4 x i8> %x) { ret <4 x i8> %a } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32-V128: {{.*}} +; RV64-V128: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 927fd3e203355..a56a81f5f793b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -51,57 +51,33 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) { } define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) { -; RV32-LABEL: vrgather_permute_shuffle_vu_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 4096 -; RV32-NEXT: addi a0, a0, 513 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-NEXT: vsext.vf2 v10, v9 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vrgather_permute_shuffle_vu_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4096 -; RV64-NEXT: addiw a0, a0, 513 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-NEXT: vsext.vf2 v10, v9 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: addi a0, a0, 513 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v9 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> ret <4 x i16> %s } define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) { -; RV32-LABEL: vrgather_permute_shuffle_uv_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 4096 -; RV32-NEXT: addi a0, a0, 513 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-NEXT: vsext.vf2 v10, v9 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vrgather_permute_shuffle_uv_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4096 -; RV64-NEXT: addiw a0, a0, 513 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-NEXT: vsext.vf2 v10, v9 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: addi a0, a0, 513 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v9 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret %s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> ret <4 x i16> %s } @@ -439,33 +415,19 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { } define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { -; RV32-LABEL: splat_ve2_we0_ins_i2ve4: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 8256 -; RV32-NEXT: addi a0, a0, 514 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v11, a0 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: li a0, 66 -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgather.vv v10, v8, v11 -; RV32-NEXT: vrgather.vi v10, v9, 0, v0.t -; RV32-NEXT: vmv1r.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: splat_ve2_we0_ins_i2ve4: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 8256 -; RV64-NEXT: addiw a0, a0, 514 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vmv.v.x v11, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: li a0, 66 -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vrgather.vv v10, v8, v11 -; RV64-NEXT: vrgather.vi v10, v9, 0, v0.t -; RV64-NEXT: vmv1r.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: splat_ve2_we0_ins_i2ve4: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8256 +; CHECK-NEXT: addi a0, a0, 514 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v11, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: li a0, 66 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %shuff } @@ -490,43 +452,24 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { } define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { -; RV32-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.i v10, 6 -; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; RV32-NEXT: vslideup.vi v11, v10, 5 -; RV32-NEXT: lui a0, 8256 -; RV32-NEXT: addi a0, a0, 2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v12, a0 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: li a0, 98 -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgather.vv v10, v8, v12 -; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t -; RV32-NEXT: vmv1r.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vmv.v.i v10, 6 -; RV64-NEXT: vmv.v.i v11, 0 -; RV64-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; RV64-NEXT: vslideup.vi v11, v10, 5 -; RV64-NEXT: lui a0, 8256 -; RV64-NEXT: addiw a0, a0, 2 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vmv.v.x v12, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: li a0, 98 -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vrgather.vv v10, v8, v12 -; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t -; RV64-NEXT: vmv1r.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 6 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v11, v10, 5 +; CHECK-NEXT: lui a0, 8256 +; CHECK-NEXT: addi a0, a0, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: li a0, 98 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %shuff } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index b2a9813e50a18..e6868abdb5b1d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1099,99 +1099,52 @@ define void @urem_v2i64(ptr %x, ptr %y) { } define void @mulhu_v16i8(ptr %x) { -; RV32-LABEL: mulhu_v16i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: lui a1, 3 -; RV32-NEXT: addi a1, a1, -2044 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: li a1, -128 -; RV32-NEXT: vmerge.vxm v10, v9, a1, v0 -; RV32-NEXT: lui a1, 1 -; RV32-NEXT: addi a2, a1, 32 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a2 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: lui a2, %hi(.LCPI65_0) -; RV32-NEXT: addi a2, a2, %lo(.LCPI65_0) -; RV32-NEXT: vle8.v v11, (a2) -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsrl.vv v9, v8, v9 -; RV32-NEXT: vmulhu.vv v9, v9, v11 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: vmulhu.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: li a2, 513 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a2 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 4 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a1, a1, 78 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 3, v0 -; RV32-NEXT: lui a1, 8 -; RV32-NEXT: addi a1, a1, 304 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 2, v0 -; RV32-NEXT: vsrl.vv v8, v8, v9 -; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhu_v16i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: lui a1, 3 -; RV64-NEXT: addiw a1, a1, -2044 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: li a1, -128 -; RV64-NEXT: vmerge.vxm v10, v9, a1, v0 -; RV64-NEXT: lui a1, 1 -; RV64-NEXT: addiw a2, a1, 32 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: lui a2, %hi(.LCPI65_0) -; RV64-NEXT: addi a2, a2, %lo(.LCPI65_0) -; RV64-NEXT: vle8.v v11, (a2) -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: vsrl.vv v9, v8, v9 -; RV64-NEXT: vmulhu.vv v9, v9, v11 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vmulhu.vv v8, v8, v10 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: li a2, 513 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, 4 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: addiw a1, a1, 78 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 3, v0 -; RV64-NEXT: lui a1, 8 -; RV64-NEXT: addiw a1, a1, 304 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 2, v0 -; RV64-NEXT: vsrl.vv v8, v8, v9 -; RV64-NEXT: vse8.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhu_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: lui a1, 3 +; CHECK-NEXT: addi a1, a1, -2044 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: li a1, -128 +; CHECK-NEXT: vmerge.vxm v10, v9, a1, v0 +; CHECK-NEXT: lui a1, 1 +; CHECK-NEXT: addi a2, a1, 32 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a2 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: lui a2, %hi(.LCPI65_0) +; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0) +; CHECK-NEXT: vle8.v v11, (a2) +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsrl.vv v9, v8, v9 +; CHECK-NEXT: vmulhu.vv v9, v9, v11 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: li a2, 513 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a2 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 4 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: addi a1, a1, 78 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v9, v9, 3, v0 +; CHECK-NEXT: lui a1, 8 +; CHECK-NEXT: addi a1, a1, 304 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x %b = udiv <16 x i8> %a, store <16 x i8> %b, ptr %x @@ -1238,53 +1191,29 @@ define void @mulhu_v8i16(ptr %x) { } define void @mulhu_v6i16(ptr %x) { -; RV32-LABEL: mulhu_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vadd.vi v9, v9, 12 -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vdivu.vv v9, v10, v9 -; RV32-NEXT: lui a1, 45217 -; RV32-NEXT: addi a1, a1, -1785 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, a1 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-NEXT: vsext.vf2 v11, v10 -; RV32-NEXT: vdivu.vv v8, v8, v11 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v9, 4 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhu_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vadd.vi v9, v9, 12 -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vdivu.vv v9, v10, v9 -; RV64-NEXT: lui a1, 45217 -; RV64-NEXT: addiw a1, a1, -1785 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a1 -; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-NEXT: vsext.vf2 v11, v10 -; RV64-NEXT: vdivu.vv v8, v8, v11 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v9, 4 -; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-NEXT: vse16.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhu_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vi v9, v9, 12 +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vdivu.vv v9, v10, v9 +; CHECK-NEXT: lui a1, 45217 +; CHECK-NEXT: addi a1, a1, -1785 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v11, v10 +; CHECK-NEXT: vdivu.vv v8, v8, v11 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = udiv <6 x i16> %a, store <6 x i16> %b, ptr %x @@ -1292,55 +1221,30 @@ define void @mulhu_v6i16(ptr %x) { } define void @mulhu_v4i32(ptr %x) { -; RV32-LABEL: mulhu_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV32-NEXT: vslideup.vi v10, v9, 2 -; RV32-NEXT: lui a1, %hi(.LCPI68_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI68_0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vle32.v v9, (a1) -; RV32-NEXT: vmulhu.vv v9, v8, v9 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: vmulhu.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: lui a1, 4128 -; RV32-NEXT: addi a1, a1, 514 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsext.vf4 v10, v9 -; RV32-NEXT: vsrl.vv v8, v8, v10 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhu_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: lui a1, 524288 -; RV64-NEXT: vmv.s.x v9, a1 -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV64-NEXT: vslideup.vi v10, v9, 2 -; RV64-NEXT: lui a1, %hi(.LCPI68_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI68_0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vle32.v v9, (a1) -; RV64-NEXT: vmulhu.vv v9, v8, v9 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vmulhu.vv v8, v8, v10 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a1, 4128 -; RV64-NEXT: addiw a1, a1, 514 -; RV64-NEXT: vmv.s.x v9, a1 -; RV64-NEXT: vsext.vf4 v10, v9 -; RV64-NEXT: vsrl.vv v8, v8, v10 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhu_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vslideup.vi v10, v9, 2 +; CHECK-NEXT: lui a1, %hi(.LCPI68_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI68_0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v9, (a1) +; CHECK-NEXT: vmulhu.vv v9, v8, v9 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: lui a1, 4128 +; CHECK-NEXT: addi a1, a1, 514 +; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vsext.vf4 v10, v9 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x %b = udiv <4 x i32> %a, store <4 x i32> %b, ptr %x @@ -1397,45 +1301,25 @@ define void @mulhu_v2i64(ptr %x) { } define void @mulhs_v16i8(ptr %x) { -; RV32-LABEL: mulhs_v16i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: li a1, -123 -; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, -1452 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: li a1, 57 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 -; RV32-NEXT: vmulhu.vv v8, v8, v9 -; RV32-NEXT: vmv.v.i v9, 7 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsrl.vv v8, v8, v9 -; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhs_v16i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: li a1, -123 -; RV64-NEXT: vmv.v.x v9, a1 -; RV64-NEXT: lui a1, 5 -; RV64-NEXT: addiw a1, a1, -1452 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: li a1, 57 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vxm v9, v9, a1, v0 -; RV64-NEXT: vmulhu.vv v8, v8, v9 -; RV64-NEXT: vmv.v.i v9, 7 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: vsrl.vv v8, v8, v9 -; RV64-NEXT: vse8.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhs_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: li a1, -123 +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: lui a1, 5 +; CHECK-NEXT: addi a1, a1, -1452 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: li a1, 57 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x %b = udiv <16 x i8> %a, store <16 x i8> %b, ptr %x @@ -1443,43 +1327,24 @@ define void @mulhs_v16i8(ptr %x) { } define void @mulhs_v8i16(ptr %x) { -; RV32-LABEL: mulhs_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, -1755 -; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: li a1, 105 -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: lui a1, 1048571 -; RV32-NEXT: addi a1, a1, 1755 -; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 -; RV32-NEXT: vmulh.vv v8, v8, v9 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vsrl.vi v9, v8, 15 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhs_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a1, 5 -; RV64-NEXT: addiw a1, a1, -1755 -; RV64-NEXT: vmv.v.x v9, a1 -; RV64-NEXT: li a1, 105 -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: lui a1, 1048571 -; RV64-NEXT: addiw a1, a1, 1755 -; RV64-NEXT: vmerge.vxm v9, v9, a1, v0 -; RV64-NEXT: vmulh.vv v8, v8, v9 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vsrl.vi v9, v8, 15 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vse16.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhs_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, 5 +; CHECK-NEXT: addi a1, a1, -1755 +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: li a1, 105 +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: lui a1, 1048571 +; CHECK-NEXT: addi a1, a1, 1755 +; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0 +; CHECK-NEXT: vmulh.vv v8, v8, v9 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x %b = sdiv <8 x i16> %a, store <8 x i16> %b, ptr %x @@ -1487,57 +1352,31 @@ define void @mulhs_v8i16(ptr %x) { } define void @mulhs_v6i16(ptr %x) { -; RV32-LABEL: mulhs_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.i v9, 7 -; RV32-NEXT: vid.v v10 -; RV32-NEXT: li a1, -14 -; RV32-NEXT: vmadd.vx v10, a1, v9 -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vdiv.vv v9, v9, v10 -; RV32-NEXT: lui a1, 1020016 -; RV32-NEXT: addi a1, a1, 2041 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, a1 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-NEXT: vsext.vf2 v11, v10 -; RV32-NEXT: vdiv.vv v8, v8, v11 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v9, 4 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhs_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.i v9, 7 -; RV64-NEXT: vid.v v10 -; RV64-NEXT: li a1, -14 -; RV64-NEXT: vmadd.vx v10, a1, v9 -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vdiv.vv v9, v9, v10 -; RV64-NEXT: lui a1, 1020016 -; RV64-NEXT: addiw a1, a1, 2041 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a1 -; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-NEXT: vsext.vf2 v11, v10 -; RV64-NEXT: vdiv.vv v8, v8, v11 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v9, 4 -; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-NEXT: vse16.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhs_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a1, -14 +; CHECK-NEXT: vmadd.vx v10, a1, v9 +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vdiv.vv v9, v9, v10 +; CHECK-NEXT: lui a1, 1020016 +; CHECK-NEXT: addi a1, a1, 2041 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v11, v10 +; CHECK-NEXT: vdiv.vv v8, v8, v11 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = sdiv <6 x i16> %a, store <6 x i16> %b, ptr %x @@ -5027,105 +4866,55 @@ define void @extract_v4i64(ptr %x, ptr %y) { } define void @mulhu_v32i8(ptr %x) { -; LMULMAX2-RV32-LABEL: mulhu_v32i8: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: li a1, 32 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV32-NEXT: lui a2, 163907 -; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: li a2, -128 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v10, a2, v0 -; LMULMAX2-RV32-NEXT: lui a2, 66049 -; LMULMAX2-RV32-NEXT: addi a2, a2, 32 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI181_0) -; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV32-NEXT: vle8.v v14, (a2) -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v10, v8, v10 -; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 4 -; LMULMAX2-RV32-NEXT: lui a2, 8208 -; LMULMAX2-RV32-NEXT: addi a2, a2, 513 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV32-NEXT: lui a2, 66785 -; LMULMAX2-RV32-NEXT: addi a2, a2, 78 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 3, v0 -; LMULMAX2-RV32-NEXT: lui a2, 529160 -; LMULMAX2-RV32-NEXT: addi a2, a2, 304 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 2, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: mulhu_v32i8: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: li a1, 32 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV64-NEXT: lui a2, 163907 -; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: li a2, -128 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v10, a2, v0 -; LMULMAX2-RV64-NEXT: lui a2, 66049 -; LMULMAX2-RV64-NEXT: addiw a2, a2, 32 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI181_0) -; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV64-NEXT: vle8.v v14, (a2) -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v10, v8, v10 -; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 4 -; LMULMAX2-RV64-NEXT: lui a2, 8208 -; LMULMAX2-RV64-NEXT: addiw a2, a2, 513 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV64-NEXT: lui a2, 66785 -; LMULMAX2-RV64-NEXT: addiw a2, a2, 78 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 3, v0 -; LMULMAX2-RV64-NEXT: lui a2, 529160 -; LMULMAX2-RV64-NEXT: addiw a2, a2, 304 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 2, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-LABEL: mulhu_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: li a1, 32 +; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: vle8.v v8, (a0) +; LMULMAX2-NEXT: vmv.v.i v10, 0 +; LMULMAX2-NEXT: lui a2, 163907 +; LMULMAX2-NEXT: addi a2, a2, -2044 +; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a2 +; LMULMAX2-NEXT: li a2, -128 +; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: vmerge.vxm v12, v10, a2, v0 +; LMULMAX2-NEXT: lui a2, 66049 +; LMULMAX2-NEXT: addi a2, a2, 32 +; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a2 +; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: lui a2, %hi(.LCPI181_0) +; LMULMAX2-NEXT: addi a2, a2, %lo(.LCPI181_0) +; LMULMAX2-NEXT: vle8.v v14, (a2) +; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-NEXT: vsrl.vv v10, v8, v10 +; LMULMAX2-NEXT: vmulhu.vv v10, v10, v14 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: vmv.v.i v10, 4 +; LMULMAX2-NEXT: lui a2, 8208 +; LMULMAX2-NEXT: addi a2, a2, 513 +; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a2 +; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-NEXT: lui a2, 66785 +; LMULMAX2-NEXT: addi a2, a2, 78 +; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a2 +; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: vmerge.vim v10, v10, 3, v0 +; LMULMAX2-NEXT: lui a2, 529160 +; LMULMAX2-NEXT: addi a2, a2, 304 +; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a2 +; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: vmerge.vim v10, v10, 2, v0 +; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 +; LMULMAX2-NEXT: vse8.v v8, (a0) +; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: mulhu_v32i8: ; LMULMAX1: # %bb.0: @@ -5242,57 +5031,31 @@ define void @mulhu_v16i16(ptr %x) { } define void @mulhu_v8i32(ptr %x) { -; LMULMAX2-RV32-LABEL: mulhu_v8i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 68 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI183_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI183_0) -; LMULMAX2-RV32-NEXT: vle32.v v10, (a1) -; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 -; LMULMAX2-RV32-NEXT: lui a1, 524288 -; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 4128 -; LMULMAX2-RV32-NEXT: addi a1, a1, 514 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsext.vf4 v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: mulhu_v8i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 68 -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI183_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI183_0) -; LMULMAX2-RV64-NEXT: vle32.v v10, (a1) -; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 -; LMULMAX2-RV64-NEXT: lui a1, 524288 -; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 4128 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 514 -; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vsext.vf4 v12, v10 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-LABEL: mulhu_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-NEXT: vle32.v v8, (a0) +; LMULMAX2-NEXT: li a1, 68 +; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0) +; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0) +; LMULMAX2-NEXT: vle32.v v10, (a1) +; LMULMAX2-NEXT: vmv.v.i v12, 0 +; LMULMAX2-NEXT: lui a1, 524288 +; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0 +; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: lui a1, 4128 +; LMULMAX2-NEXT: addi a1, a1, 514 +; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX2-NEXT: vmv.v.x v10, a1 +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-NEXT: vsext.vf4 v12, v10 +; LMULMAX2-NEXT: vsrl.vv v8, v8, v12 +; LMULMAX2-NEXT: vse32.v v8, (a0) +; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: mulhu_v8i32: ; LMULMAX1-RV32: # %bb.0: @@ -5334,7 +5097,7 @@ define void @mulhu_v8i32(ptr %x) { ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a1) ; LMULMAX1-RV64-NEXT: lui a2, 36976 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1541 +; LMULMAX1-RV64-NEXT: addi a2, a2, 1541 ; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 ; LMULMAX1-RV64-NEXT: vsext.vf4 v11, v10 ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v9, v11 @@ -5398,7 +5161,7 @@ define void @mulhu_v4i64(ptr %x) { ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: lui a1, 12320 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 513 +; LMULMAX2-RV64-NEXT: addi a1, a1, 513 ; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 ; LMULMAX2-RV64-NEXT: vsext.vf8 v12, v10 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 @@ -5481,85 +5244,45 @@ define void @mulhu_v4i64(ptr %x) { } define void @mulhs_v32i8(ptr %x) { -; LMULMAX2-RV32-LABEL: mulhs_v32i8: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: li a1, 32 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 -; LMULMAX2-RV32-NEXT: lui a2, 304453 -; LMULMAX2-RV32-NEXT: addi a2, a2, -1452 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV32-NEXT: li a1, -123 -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32-NEXT: li a1, 57 -; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: mulhs_v32i8: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: li a1, 32 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 -; LMULMAX2-RV64-NEXT: lui a2, 304453 -; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV64-NEXT: li a1, -123 -; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV64-NEXT: li a1, 57 -; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: mulhs_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a1) -; LMULMAX1-RV32-NEXT: lui a2, 5 -; LMULMAX1-RV32-NEXT: addi a2, a2, -1452 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, -9 -; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 9, v0 -; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a1) -; LMULMAX1-RV32-NEXT: ret +; LMULMAX2-LABEL: mulhs_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: li a1, 32 +; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: vle8.v v8, (a0) +; LMULMAX2-NEXT: vmv.v.i v10, 7 +; LMULMAX2-NEXT: lui a2, 304453 +; LMULMAX2-NEXT: addi a2, a2, -1452 +; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a2 +; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-NEXT: li a1, -123 +; LMULMAX2-NEXT: vmv.v.x v12, a1 +; LMULMAX2-NEXT: li a1, 57 +; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0 +; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 +; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 +; LMULMAX2-NEXT: vse8.v v8, (a0) +; LMULMAX2-NEXT: ret ; -; LMULMAX1-RV64-LABEL: mulhs_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a1) -; LMULMAX1-RV64-NEXT: lui a2, 5 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -1452 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v10, -9 -; LMULMAX1-RV64-NEXT: vmerge.vim v10, v10, 9, v0 -; LMULMAX1-RV64-NEXT: vdivu.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: mulhs_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-NEXT: vle8.v v8, (a0) +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle8.v v9, (a1) +; LMULMAX1-NEXT: lui a2, 5 +; LMULMAX1-NEXT: addi a2, a2, -1452 +; LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX1-NEXT: vmv.s.x v0, a2 +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-NEXT: vmv.v.i v10, -9 +; LMULMAX1-NEXT: vmerge.vim v10, v10, 9, v0 +; LMULMAX1-NEXT: vdivu.vv v9, v9, v10 +; LMULMAX1-NEXT: vdivu.vv v8, v8, v10 +; LMULMAX1-NEXT: vse8.v v8, (a0) +; LMULMAX1-NEXT: vse8.v v9, (a1) +; LMULMAX1-NEXT: ret %a = load <32 x i8>, ptr %x %b = udiv <32 x i8> %a, store <32 x i8> %b, ptr %x @@ -5567,45 +5290,25 @@ define void @mulhs_v32i8(ptr %x) { } define void @mulhs_v16i16(ptr %x) { -; LMULMAX2-RV32-LABEL: mulhs_v16i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: lui a1, 5 -; LMULMAX2-RV32-NEXT: addi a1, a1, -1755 -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: lui a1, 7 -; LMULMAX2-RV32-NEXT: addi a1, a1, -1687 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: lui a1, 1048571 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1755 -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 -; LMULMAX2-RV32-NEXT: vmulh.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsra.vi v8, v8, 1 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 15 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: mulhs_v16i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a1, 5 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -1755 -; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV64-NEXT: lui a1, 7 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -1687 -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: lui a1, 1048571 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1755 -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0 -; LMULMAX2-RV64-NEXT: vmulh.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsra.vi v8, v8, 1 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 15 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-LABEL: mulhs_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-NEXT: vle16.v v8, (a0) +; LMULMAX2-NEXT: lui a1, 5 +; LMULMAX2-NEXT: addi a1, a1, -1755 +; LMULMAX2-NEXT: vmv.v.x v10, a1 +; LMULMAX2-NEXT: lui a1, 7 +; LMULMAX2-NEXT: addi a1, a1, -1687 +; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: lui a1, 1048571 +; LMULMAX2-NEXT: addi a1, a1, 1755 +; LMULMAX2-NEXT: vmerge.vxm v10, v10, a1, v0 +; LMULMAX2-NEXT: vmulh.vv v8, v8, v10 +; LMULMAX2-NEXT: vsra.vi v8, v8, 1 +; LMULMAX2-NEXT: vsrl.vi v10, v8, 15 +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: vse16.v v8, (a0) +; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: mulhs_v16i16: ; LMULMAX1: # %bb.0: @@ -5764,14 +5467,14 @@ define void @mulhs_v4i64(ptr %x) { ; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV64-NEXT: vmulh.vv v10, v8, v10 ; LMULMAX2-RV64-NEXT: lui a1, 1044496 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -256 +; LMULMAX2-RV64-NEXT: addi a1, a1, -256 ; LMULMAX2-RV64-NEXT: vmv.s.x v12, a1 ; LMULMAX2-RV64-NEXT: vsext.vf8 v14, v12 ; LMULMAX2-RV64-NEXT: vmadd.vv v14, v8, v10 ; LMULMAX2-RV64-NEXT: li a1, 63 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v14, a1 ; LMULMAX2-RV64-NEXT: lui a1, 4096 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 256 +; LMULMAX2-RV64-NEXT: addi a1, a1, 256 ; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 ; LMULMAX2-RV64-NEXT: vsext.vf8 v12, v10 ; LMULMAX2-RV64-NEXT: vsra.vv v10, v14, v12 @@ -8356,33 +8059,19 @@ define void @mulhu_vx_v16i8(ptr %x) { } define void @mulhu_vx_v8i16(ptr %x) { -; RV32-LABEL: mulhu_vx_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: lui a1, 2 -; RV32-NEXT: addi a1, a1, 1171 -; RV32-NEXT: vmulhu.vx v9, v8, a1 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 1 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhu_vx_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a1, 2 -; RV64-NEXT: addiw a1, a1, 1171 -; RV64-NEXT: vmulhu.vx v9, v8, a1 -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v8, v8, 1 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vse16.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhu_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, 2 +; CHECK-NEXT: addi a1, a1, 1171 +; CHECK-NEXT: vmulhu.vx v9, v8, a1 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x %b = udiv <8 x i16> %a, store <8 x i16> %b, ptr %x @@ -8390,27 +8079,16 @@ define void @mulhu_vx_v8i16(ptr %x) { } define void @mulhu_vx_v4i32(ptr %x) { -; RV32-LABEL: mulhu_vx_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: lui a1, 838861 -; RV32-NEXT: addi a1, a1, -819 -; RV32-NEXT: vmulhu.vx v8, v8, a1 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhu_vx_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: lui a1, 838861 -; RV64-NEXT: addiw a1, a1, -819 -; RV64-NEXT: vmulhu.vx v8, v8, a1 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhu_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a1, 838861 +; CHECK-NEXT: addi a1, a1, -819 +; CHECK-NEXT: vmulhu.vx v8, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x %b = udiv <4 x i32> %a, store <4 x i32> %b, ptr %x @@ -8472,31 +8150,18 @@ define void @mulhs_vx_v16i8(ptr %x) { } define void @mulhs_vx_v8i16(ptr %x) { -; RV32-LABEL: mulhs_vx_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, -1755 -; RV32-NEXT: vmulh.vx v8, v8, a1 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vsrl.vi v9, v8, 15 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhs_vx_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a1, 5 -; RV64-NEXT: addiw a1, a1, -1755 -; RV64-NEXT: vmulh.vx v8, v8, a1 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vsrl.vi v9, v8, 15 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vse16.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhs_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, 5 +; CHECK-NEXT: addi a1, a1, -1755 +; CHECK-NEXT: vmulh.vx v8, v8, a1 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x %b = sdiv <8 x i16> %a, store <8 x i16> %b, ptr %x @@ -8522,7 +8187,7 @@ define void @mulhs_vx_v4i32(ptr %x) { ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a1, 629146 -; RV64-NEXT: addiw a1, a1, -1639 +; RV64-NEXT: addi a1, a1, -1639 ; RV64-NEXT: vmulh.vx v8, v8, a1 ; RV64-NEXT: vsra.vi v8, v8, 1 ; RV64-NEXT: vsrl.vi v9, v8, 31 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index b3099f6b57056..eeb8e517d01d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -845,7 +845,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t ; RV64-NEXT: lui a1, 16 -; RV64-NEXT: addiw a1, a1, 7 +; RV64-NEXT: addi a1, a1, 7 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.x v12, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu @@ -917,7 +917,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v4, v8 ; RV64-NEXT: lui a1, 112 -; RV64-NEXT: addiw a1, a1, 1 +; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.x v12, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index eae615db137ea..d23c494ba37a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -426,21 +426,21 @@ define <10 x i1> @buildvec_mask_v10i1() { } define <16 x i1> @buildvec_mask_v16i1() { -; CHECK-RV32-LABEL: buildvec_mask_v16i1: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: lui a0, 11 -; CHECK-RV32-NEXT: addi a0, a0, 1718 -; CHECK-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-RV32-NEXT: vmv.s.x v0, a0 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: buildvec_mask_v16i1: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: lui a0, 11 -; CHECK-RV64-NEXT: addiw a0, a0, 1718 -; CHECK-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-RV64-NEXT: vmv.s.x v0, a0 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: buildvec_mask_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 11 +; CHECK-NEXT: addi a0, a0, 1718 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: ret +; +; ZVE32F-LABEL: buildvec_mask_v16i1: +; ZVE32F: # %bb.0: +; ZVE32F-NEXT: lui a0, 11 +; ZVE32F-NEXT: addi a0, a0, 1718 +; ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: ret ret <16 x i1> } @@ -478,7 +478,7 @@ define <32 x i1> @buildvec_mask_v32i1() { ; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX1-NEXT: lui a0, 11 -; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 +; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 ; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 ; RV64-LMULMAX1-NEXT: ret ; @@ -493,7 +493,7 @@ define <32 x i1> @buildvec_mask_v32i1() { ; RV64-LMULMAX2-LABEL: buildvec_mask_v32i1: ; RV64-LMULMAX2: # %bb.0: ; RV64-LMULMAX2-NEXT: lui a0, 748384 -; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX2-NEXT: ret @@ -509,7 +509,7 @@ define <32 x i1> @buildvec_mask_v32i1() { ; RV64-LMULMAX4-LABEL: buildvec_mask_v32i1: ; RV64-LMULMAX4: # %bb.0: ; RV64-LMULMAX4-NEXT: lui a0, 748384 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX4-NEXT: ret @@ -525,10 +525,18 @@ define <32 x i1> @buildvec_mask_v32i1() { ; RV64-LMULMAX8-LABEL: buildvec_mask_v32i1: ; RV64-LMULMAX8: # %bb.0: ; RV64-LMULMAX8-NEXT: lui a0, 748384 -; RV64-LMULMAX8-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX8-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX8-NEXT: ret +; +; ZVE32F-LABEL: buildvec_mask_v32i1: +; ZVE32F: # %bb.0: +; ZVE32F-NEXT: lui a0, 748384 +; ZVE32F-NEXT: addi a0, a0, 1776 +; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: ret ret <32 x i1> } @@ -553,10 +561,10 @@ define <64 x i1> @buildvec_mask_v64i1() { ; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX1-NEXT: lui a0, 4 -; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX1-NEXT: addi a0, a0, -1793 ; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 ; RV64-LMULMAX1-NEXT: lui a0, 11 -; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 +; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 ; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 ; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8 ; RV64-LMULMAX1-NEXT: ret @@ -575,11 +583,11 @@ define <64 x i1> @buildvec_mask_v64i1() { ; RV64-LMULMAX2-LABEL: buildvec_mask_v64i1: ; RV64-LMULMAX2: # %bb.0: ; RV64-LMULMAX2-NEXT: lui a0, 748384 -; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX2-NEXT: lui a0, 748388 -; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX2-NEXT: addi a0, a0, -1793 ; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 ; RV64-LMULMAX2-NEXT: ret ; @@ -622,6 +630,18 @@ define <64 x i1> @buildvec_mask_v64i1() { ; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-LMULMAX8-NEXT: vle64.v v0, (a0) ; RV64-LMULMAX8-NEXT: ret +; +; ZVE32F-LABEL: buildvec_mask_v64i1: +; ZVE32F: # %bb.0: +; ZVE32F-NEXT: lui a0, 748388 +; ZVE32F-NEXT: addi a0, a0, -1793 +; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; ZVE32F-NEXT: vmv.v.x v0, a0 +; ZVE32F-NEXT: lui a0, 748384 +; ZVE32F-NEXT: addi a0, a0, 1776 +; ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: ret ret <64 x i1> } @@ -654,16 +674,16 @@ define <128 x i1> @buildvec_mask_v128i1() { ; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX1-NEXT: lui a0, 11 -; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 +; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 ; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 ; RV64-LMULMAX1-NEXT: lui a0, 8 -; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 +; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 ; RV64-LMULMAX1-NEXT: vmv.s.x v12, a0 ; RV64-LMULMAX1-NEXT: lui a0, 4 -; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX1-NEXT: addi a0, a0, -1793 ; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 ; RV64-LMULMAX1-NEXT: lui a0, 14 -; RV64-LMULMAX1-NEXT: addiw a0, a0, 1722 +; RV64-LMULMAX1-NEXT: addi a0, a0, 1722 ; RV64-LMULMAX1-NEXT: vmv.s.x v14, a0 ; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8 ; RV64-LMULMAX1-NEXT: vmv1r.v v11, v0 @@ -690,17 +710,17 @@ define <128 x i1> @buildvec_mask_v128i1() { ; RV64-LMULMAX2-LABEL: buildvec_mask_v128i1: ; RV64-LMULMAX2: # %bb.0: ; RV64-LMULMAX2-NEXT: lui a0, 748384 -; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX2-NEXT: lui a0, 748388 -; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX2-NEXT: addi a0, a0, -1793 ; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 ; RV64-LMULMAX2-NEXT: lui a0, 551776 -; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX2-NEXT: vmv.s.x v9, a0 ; RV64-LMULMAX2-NEXT: lui a0, 945060 -; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX2-NEXT: addi a0, a0, -1793 ; RV64-LMULMAX2-NEXT: vmv.s.x v10, a0 ; RV64-LMULMAX2-NEXT: ret ; @@ -794,16 +814,16 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { ; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX1-NEXT: lui a0, 11 -; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 +; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 ; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 ; RV64-LMULMAX1-NEXT: lui a0, 8 -; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 +; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 ; RV64-LMULMAX1-NEXT: vmv.s.x v12, a0 ; RV64-LMULMAX1-NEXT: lui a0, 4 -; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX1-NEXT: addi a0, a0, -1793 ; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 ; RV64-LMULMAX1-NEXT: lui a0, 14 -; RV64-LMULMAX1-NEXT: addiw a0, a0, 1722 +; RV64-LMULMAX1-NEXT: addi a0, a0, 1722 ; RV64-LMULMAX1-NEXT: vmv.s.x v14, a0 ; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8 ; RV64-LMULMAX1-NEXT: vmv1r.v v11, v0 @@ -830,17 +850,17 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { ; RV64-LMULMAX2-LABEL: buildvec_mask_optsize_v128i1: ; RV64-LMULMAX2: # %bb.0: ; RV64-LMULMAX2-NEXT: lui a0, 748384 -; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX2-NEXT: lui a0, 748388 -; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX2-NEXT: addi a0, a0, -1793 ; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 ; RV64-LMULMAX2-NEXT: lui a0, 551776 -; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX2-NEXT: vmv.s.x v9, a0 ; RV64-LMULMAX2-NEXT: lui a0, 945060 -; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX2-NEXT: addi a0, a0, -1793 ; RV64-LMULMAX2-NEXT: vmv.s.x v10, a0 ; RV64-LMULMAX2-NEXT: ret ; @@ -895,3 +915,6 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { ; ZVE32F-NEXT: ret ret <128 x i1> } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-RV32: {{.*}} +; CHECK-RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 480e5c2f8f2b8..f8a8ffd3a0797 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -14507,7 +14507,7 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; RV64V-LABEL: mgather_gather_4xSEW_partial_align: ; RV64V: # %bb.0: ; RV64V-NEXT: lui a1, 82176 -; RV64V-NEXT: addiw a1, a1, 1024 +; RV64V-NEXT: addi a1, a1, 1024 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64V-NEXT: vmv.s.x v9, a1 ; RV64V-NEXT: vluxei8.v v8, (a0), v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll index 8c96392f08a5d..dd9a1118ab821 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll @@ -649,39 +649,22 @@ define i32 @reduce_smin_16xi32_prefix2(ptr %p) { } define i32 @reduce_smin_16xi32_prefix5(ptr %p) { -; RV32-LABEL: reduce_smin_16xi32_prefix5: -; RV32: # %bb.0: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vmv.s.x v10, a1 -; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV32-NEXT: vslideup.vi v8, v10, 5 -; RV32-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV32-NEXT: vslideup.vi v8, v10, 6 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 7 -; RV32-NEXT: vredmin.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: reduce_smin_16xi32_prefix5: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, 524288 -; RV64-NEXT: addiw a1, a1, -1 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vmv.s.x v10, a1 -; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64-NEXT: vslideup.vi v8, v10, 5 -; RV64-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64-NEXT: vslideup.vi v8, v10, 6 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 7 -; RV64-NEXT: vredmin.vs v8, v8, v8 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: reduce_smin_16xi32_prefix5: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 5 +; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 6 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 %e0 = extractelement <16 x i32> %v, i32 0 %e1 = extractelement <16 x i32> %v, i32 1 @@ -781,3 +764,6 @@ define i32 @reduce_umin_16xi32_prefix5(ptr %p) { %umin3 = call i32 @llvm.umin.i32(i32 %umin2, i32 %e4) ret i32 %umin3 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll index 1bf832a229b21..a1d2b5106d5a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll @@ -37,69 +37,39 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { } define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { -; RV32-LABEL: trn1.v16i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vid.v v11 -; RV32-NEXT: vrgather.vv v10, v8, v11 -; RV32-NEXT: vadd.vi v8, v11, -1 -; RV32-NEXT: lui a0, 11 -; RV32-NEXT: addi a0, a0, -1366 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV32-NEXT: vrgather.vv v10, v9, v8, v0.t -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: trn1.v16i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vid.v v11 -; RV64-NEXT: vrgather.vv v10, v8, v11 -; RV64-NEXT: vadd.vi v8, v11, -1 -; RV64-NEXT: lui a0, 11 -; RV64-NEXT: addiw a0, a0, -1366 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vrgather.vv v10, v9, v8, v0.t -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: trn1.v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: lui a0, 11 +; CHECK-NEXT: addi a0, a0, -1366 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ret <16 x i8> %tmp0 } define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { -; RV32-LABEL: trn2.v16i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vid.v v11 -; RV32-NEXT: vadd.vi v12, v11, 1 -; RV32-NEXT: vrgather.vv v10, v8, v12 -; RV32-NEXT: lui a0, 11 -; RV32-NEXT: addi a0, a0, -1366 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: trn2.v16i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vid.v v11 -; RV64-NEXT: vadd.vi v12, v11, 1 -; RV64-NEXT: vrgather.vv v10, v8, v12 -; RV64-NEXT: lui a0, 11 -; RV64-NEXT: addiw a0, a0, -1366 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: trn2.v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: lui a0, 11 +; CHECK-NEXT: addi a0, a0, -1366 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ret <16 x i8> %tmp0 } @@ -379,3 +349,6 @@ define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ret <8 x half> %tmp0 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll index 49daa4413035c..fd117f9e8ea07 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll @@ -352,27 +352,16 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert2(<4 x i8> %v, i8 %b) { } define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) { -; RV32-LABEL: vslide1up_4xi8_neg_incorrect_insert3: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 8208 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vslide1up_4xi8_neg_incorrect_insert3: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 8208 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert3: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8208 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret %v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> ret <4 x i8> %v2 } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll index fc05648270367..32d26827f989e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -118,19 +118,12 @@ define void @store_constant_v2i8(ptr %p) { } define void @store_constant_v2i16(ptr %p) { -; RV32-LABEL: store_constant_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: lui a1, 96 -; RV32-NEXT: addi a1, a1, 3 -; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_constant_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, 96 -; RV64-NEXT: addiw a1, a1, 3 -; RV64-NEXT: sw a1, 0(a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_constant_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 96 +; CHECK-NEXT: addi a1, a1, 3 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret store <2 x i16> , ptr %p ret void } @@ -150,87 +143,52 @@ define void @store_constant_v2i32(ptr %p) { } define void @store_constant_v4i8(ptr %p) { -; RV32-LABEL: store_constant_v4i8: -; RV32: # %bb.0: -; RV32-NEXT: lui a1, 4176 -; RV32-NEXT: addi a1, a1, 1539 -; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_constant_v4i8: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, 4176 -; RV64-NEXT: addiw a1, a1, 1539 -; RV64-NEXT: sw a1, 0(a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_constant_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 4176 +; CHECK-NEXT: addi a1, a1, 1539 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret store <4 x i8> , ptr %p ret void } define void @store_constant_v4i16(ptr %p) { -; RV32-LABEL: store_constant_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: lui a1, 4176 -; RV32-NEXT: addi a1, a1, 1539 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-NEXT: vsext.vf2 v9, v8 -; RV32-NEXT: vse16.v v9, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_constant_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, 4176 -; RV64-NEXT: addiw a1, a1, 1539 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-NEXT: vsext.vf2 v9, v8 -; RV64-NEXT: vse16.v v9, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_constant_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 4176 +; CHECK-NEXT: addi a1, a1, 1539 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v9, v8 +; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: ret store <4 x i16> , ptr %p ret void } define void @store_constant_v4i32(ptr %p) { -; RV32-LABEL: store_constant_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: lui a1, 4176 -; RV32-NEXT: addi a1, a1, 1539 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vsext.vf4 v9, v8 -; RV32-NEXT: vse32.v v9, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_constant_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, 4176 -; RV64-NEXT: addiw a1, a1, 1539 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsext.vf4 v9, v8 -; RV64-NEXT: vse32.v v9, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_constant_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 4176 +; CHECK-NEXT: addi a1, a1, 1539 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vsext.vf4 v9, v8 +; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: ret store <4 x i32> , ptr %p ret void } define void @store_id_v4i8(ptr %p) { -; RV32-LABEL: store_id_v4i8: -; RV32: # %bb.0: -; RV32-NEXT: lui a1, 12320 -; RV32-NEXT: addi a1, a1, 256 -; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_id_v4i8: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, 12320 -; RV64-NEXT: addiw a1, a1, 256 -; RV64-NEXT: sw a1, 0(a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_id_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 12320 +; CHECK-NEXT: addi a1, a1, 256 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret store <4 x i8> , ptr %p ret void } @@ -279,3 +237,6 @@ define void @store_constant_v2i8_volatile(ptr %p) { store volatile <2 x i8> , ptr %p ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index 8ac3b7d02e338..ef970ad63ae77 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -56,7 +56,7 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado ; V: # %bb.0: # %entry ; V-NEXT: li a2, 1024 ; V-NEXT: lui a3, 983765 -; V-NEXT: addiw a3, a3, 873 +; V-NEXT: addi a3, a3, 873 ; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; V-NEXT: vmv.s.x v0, a3 ; V-NEXT: li a3, 32 @@ -80,7 +80,7 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado ; ZVE32F: # %bb.0: # %entry ; ZVE32F-NEXT: li a2, 1024 ; ZVE32F-NEXT: lui a3, 983765 -; ZVE32F-NEXT: addiw a3, a3, 873 +; ZVE32F-NEXT: addi a3, a3, 873 ; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; ZVE32F-NEXT: vmv.s.x v0, a3 ; ZVE32F-NEXT: li a3, 32 @@ -331,7 +331,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read ; V-NEXT: li a2, 1024 ; V-NEXT: li a3, 32 ; V-NEXT: lui a4, 983765 -; V-NEXT: addiw a4, a4, 873 +; V-NEXT: addi a4, a4, 873 ; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; V-NEXT: vmv.s.x v0, a4 ; V-NEXT: li a4, 5 @@ -355,7 +355,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read ; ZVE32F-NEXT: li a2, 1024 ; ZVE32F-NEXT: li a3, 32 ; ZVE32F-NEXT: lui a4, 983765 -; ZVE32F-NEXT: addiw a4, a4, 873 +; ZVE32F-NEXT: addi a4, a4, 873 ; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; ZVE32F-NEXT: vmv.s.x v0, a4 ; ZVE32F-NEXT: li a4, 5 @@ -838,7 +838,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt ; CHECK-NEXT: # %bb.4: # %bb30 ; CHECK-NEXT: beq a4, a5, .LBB13_7 ; CHECK-NEXT: .LBB13_5: # %bb32 -; CHECK-NEXT: addiw a2, a3, -1024 +; CHECK-NEXT: addi a2, a3, -1024 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: slli a4, a3, 2 ; CHECK-NEXT: add a1, a1, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll index db2361dd3e586..ca833451233be 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll @@ -16,7 +16,7 @@ define i32 @test(i32 %call.i) { ; CHECK-V-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-V-NEXT: lui a0, 524288 ; CHECK-V-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-V-NEXT: addiw a0, a0, 2 +; CHECK-V-NEXT: addi a0, a0, 2 ; CHECK-V-NEXT: vmslt.vx v0, v8, a0 ; CHECK-V-NEXT: vmv.v.i v8, 0 ; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 8e47dd72ae218..6cfa504b501ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -911,7 +911,7 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) { ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8 ; CHECK-V-NEXT: lui a0, 8 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v9, a0 ; CHECK-V-NEXT: lui a0, 1048568 ; CHECK-V-NEXT: vmax.vx v8, v8, a0 @@ -952,7 +952,7 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) { ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vminu.vx v8, v9, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v8, 0 @@ -993,7 +993,7 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v9, a0 ; CHECK-V-NEXT: vmax.vx v8, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -1073,7 +1073,7 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) { ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8 ; CHECK-V-NEXT: lui a0, 8 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: lui a0, 1048568 ; CHECK-V-NEXT: vmax.vx v8, v8, a0 @@ -1132,7 +1132,7 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) { ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vminu.vx v8, v8, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v8, 0 @@ -1199,7 +1199,7 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v8, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -1511,7 +1511,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 8 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: lui a0, 1048568 ; CHECK-V-NEXT: vmax.vx v10, v8, a0 @@ -1794,7 +1794,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vminu.vx v10, v10, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 @@ -2097,7 +2097,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -4227,7 +4227,7 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) { ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8 ; CHECK-V-NEXT: lui a0, 8 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v9, a0 ; CHECK-V-NEXT: lui a0, 1048568 ; CHECK-V-NEXT: vmax.vx v8, v8, a0 @@ -4266,7 +4266,7 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) { ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vminu.vx v8, v9, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v8, 0 @@ -4306,7 +4306,7 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) { ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v9, a0 ; CHECK-V-NEXT: vmax.vx v8, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma @@ -4384,7 +4384,7 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) { ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8 ; CHECK-V-NEXT: lui a0, 8 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: lui a0, 1048568 ; CHECK-V-NEXT: vmax.vx v8, v8, a0 @@ -4441,7 +4441,7 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) { ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vminu.vx v8, v8, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v8, 0 @@ -4507,7 +4507,7 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) { ; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v8, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -4817,7 +4817,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 8 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: lui a0, 1048568 ; CHECK-V-NEXT: vmax.vx v10, v8, a0 @@ -5096,7 +5096,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vminu.vx v10, v10, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 @@ -5398,7 +5398,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 -; CHECK-V-NEXT: addiw a0, a0, -1 +; CHECK-V-NEXT: addi a0, a0, -1 ; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll index 82c9f405c2392..4e08f401ca4e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -65,7 +65,7 @@ define @reverse_nxv2i1( %a) { ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 @@ -82,7 +82,7 @@ define @reverse_nxv2i1( %a) { ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 2 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 @@ -97,7 +97,7 @@ define @reverse_nxv2i1( %a) { ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 2 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 @@ -163,7 +163,7 @@ define @reverse_nxv4i1( %a) { ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 @@ -180,7 +180,7 @@ define @reverse_nxv4i1( %a) { ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 1 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 @@ -195,7 +195,7 @@ define @reverse_nxv4i1( %a) { ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 1 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 @@ -257,7 +257,7 @@ define @reverse_nxv8i1( %a) { ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v10 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 @@ -273,7 +273,7 @@ define @reverse_nxv8i1( %a) { ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 @@ -287,7 +287,7 @@ define @reverse_nxv8i1( %a) { ; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 @@ -353,7 +353,7 @@ define @reverse_nxv16i1( %a) { ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 @@ -370,7 +370,7 @@ define @reverse_nxv16i1( %a) { ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 1 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v10 @@ -385,7 +385,7 @@ define @reverse_nxv16i1( %a) { ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 1 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v10 @@ -451,7 +451,7 @@ define @reverse_nxv32i1( %a) { ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 @@ -468,7 +468,7 @@ define @reverse_nxv32i1( %a) { ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 2 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v12 @@ -483,7 +483,7 @@ define @reverse_nxv32i1( %a) { ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-512-NEXT: vrgather.vv v16, v8, v12 @@ -552,7 +552,7 @@ define @reverse_nxv64i1( %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v8 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 @@ -574,7 +574,7 @@ define @reverse_nxv64i1( %a) { ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 3 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: vrsub.vx v16, v16, a0 ; RV64-BITS-256-NEXT: vrgather.vv v24, v8, v16 @@ -586,7 +586,7 @@ define @reverse_nxv64i1( %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; RV64-BITS-512-NEXT: vid.v v8 ; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 @@ -650,7 +650,7 @@ define @reverse_nxv1i8( %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 3 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 @@ -663,7 +663,7 @@ define @reverse_nxv1i8( %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 3 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 @@ -675,7 +675,7 @@ define @reverse_nxv1i8( %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 3 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 @@ -728,7 +728,7 @@ define @reverse_nxv2i8( %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 @@ -741,7 +741,7 @@ define @reverse_nxv2i8( %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 2 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 @@ -753,7 +753,7 @@ define @reverse_nxv2i8( %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 2 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 @@ -806,7 +806,7 @@ define @reverse_nxv4i8( %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 @@ -819,7 +819,7 @@ define @reverse_nxv4i8( %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 1 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 @@ -831,7 +831,7 @@ define @reverse_nxv4i8( %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 1 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 @@ -880,7 +880,7 @@ define @reverse_nxv8i8( %a) { ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i8: ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v10 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 @@ -892,7 +892,7 @@ define @reverse_nxv8i8( %a) { ; RV64-BITS-256-LABEL: reverse_nxv8i8: ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 @@ -903,7 +903,7 @@ define @reverse_nxv8i8( %a) { ; RV64-BITS-512-LABEL: reverse_nxv8i8: ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 @@ -956,7 +956,7 @@ define @reverse_nxv16i8( %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 @@ -969,7 +969,7 @@ define @reverse_nxv16i8( %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 1 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: vrsub.vx v12, v10, a0 @@ -981,7 +981,7 @@ define @reverse_nxv16i8( %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 1 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: vrsub.vx v12, v10, a0 @@ -1034,7 +1034,7 @@ define @reverse_nxv32i8( %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 @@ -1047,7 +1047,7 @@ define @reverse_nxv32i8( %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 2 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: vrsub.vx v16, v12, a0 @@ -1059,7 +1059,7 @@ define @reverse_nxv32i8( %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: vrsub.vx v16, v12, a0 @@ -1114,7 +1114,7 @@ define @reverse_nxv64i8( %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0 @@ -1128,7 +1128,7 @@ define @reverse_nxv64i8( %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 3 -; RV64-BITS-256-NEXT: addiw a0, a0, -1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: vrsub.vx v24, v16, a0 @@ -1140,7 +1140,7 @@ define @reverse_nxv64i8( %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 -; RV64-BITS-512-NEXT: addiw a0, a0, -1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; RV64-BITS-512-NEXT: vid.v v16 ; RV64-BITS-512-NEXT: vrsub.vx v24, v16, a0 @@ -1153,305 +1153,175 @@ define @reverse_nxv64i8( %a) { } define @reverse_nxv1i16( %a) { -; RV32-LABEL: reverse_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vrsub.vx v10, v9, a0 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 3 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vrsub.vx v10, v9, a0 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a0 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv1i16( %a) ret %res } define @reverse_nxv2i16( %a) { -; RV32-LABEL: reverse_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vrsub.vx v10, v9, a0 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vrsub.vx v10, v9, a0 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a0 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv2i16( %a) ret %res } define @reverse_nxv4i16( %a) { -; RV32-LABEL: reverse_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vrsub.vx v10, v9, a0 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv.v.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vrsub.vx v10, v9, a0 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv.v.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a0 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv4i16( %a) ret %res } define @reverse_nxv8i16( %a) { -; RV32-LABEL: reverse_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vrsub.vx v12, v10, a0 -; RV32-NEXT: vrgather.vv v10, v8, v12 -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV64-NEXT: vid.v v10 -; RV64-NEXT: vrsub.vx v12, v10, a0 -; RV64-NEXT: vrgather.vv v10, v8, v12 -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv8i16( %a) ret %res } define @reverse_nxv16i16( %a) { -; RV32-LABEL: reverse_nxv16i16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vrsub.vx v16, v12, a0 -; RV32-NEXT: vrgather.vv v12, v8, v16 -; RV32-NEXT: vmv.v.v v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv16i16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vrsub.vx v16, v12, a0 -; RV64-NEXT: vrgather.vv v12, v8, v16 -; RV64-NEXT: vmv.v.v v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a0 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv16i16( %a) ret %res } define @reverse_nxv32i16( %a) { -; RV32-LABEL: reverse_nxv32i16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV32-NEXT: vid.v v16 -; RV32-NEXT: vrsub.vx v24, v16, a0 -; RV32-NEXT: vrgather.vv v16, v8, v24 -; RV32-NEXT: vmv.v.v v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv32i16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV64-NEXT: vid.v v16 -; RV64-NEXT: vrsub.vx v24, v16, a0 -; RV64-NEXT: vrgather.vv v16, v8, v24 -; RV64-NEXT: vmv.v.v v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv32i16( %a) ret %res } define @reverse_nxv1i32( %a) { -; RV32-LABEL: reverse_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vrsub.vx v10, v9, a0 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 3 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vrsub.vx v10, v9, a0 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a0 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv1i32( %a) ret %res } define @reverse_nxv2i32( %a) { -; RV32-LABEL: reverse_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vrsub.vx v10, v9, a0 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv.v.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vrsub.vx v10, v9, a0 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv.v.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a0 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv2i32( %a) ret %res } define @reverse_nxv4i32( %a) { -; RV32-LABEL: reverse_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vrsub.vx v12, v10, a0 -; RV32-NEXT: vrgather.vv v10, v8, v12 -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vid.v v10 -; RV64-NEXT: vrsub.vx v12, v10, a0 -; RV64-NEXT: vrgather.vv v10, v8, v12 -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv4i32( %a) ret %res } define @reverse_nxv8i32( %a) { -; RV32-LABEL: reverse_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vrsub.vx v16, v12, a0 -; RV32-NEXT: vrgather.vv v12, v8, v16 -; RV32-NEXT: vmv.v.v v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vrsub.vx v16, v12, a0 -; RV64-NEXT: vrgather.vv v12, v8, v16 -; RV64-NEXT: vmv.v.v v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a0 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv8i32( %a) ret %res } define @reverse_nxv16i32( %a) { -; RV32-LABEL: reverse_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vid.v v16 -; RV32-NEXT: vrsub.vx v24, v16, a0 -; RV32-NEXT: vrgather.vv v16, v8, v24 -; RV32-NEXT: vmv.v.v v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vid.v v16 -; RV64-NEXT: vrsub.vx v24, v16, a0 -; RV64-NEXT: vrgather.vv v16, v8, v24 -; RV64-NEXT: vmv.v.v v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv16i32( %a) ret %res } @@ -1524,305 +1394,175 @@ define @reverse_nxv8i64( %a) { ; define @reverse_nxv1f16( %a) { -; RV32-LABEL: reverse_nxv1f16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vrsub.vx v10, v9, a0 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv1f16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 3 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vrsub.vx v10, v9, a0 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a0 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv1f16( %a) ret %res } define @reverse_nxv2f16( %a) { -; RV32-LABEL: reverse_nxv2f16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vrsub.vx v10, v9, a0 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv2f16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vrsub.vx v10, v9, a0 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a0 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv2f16( %a) ret %res } define @reverse_nxv4f16( %a) { -; RV32-LABEL: reverse_nxv4f16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vrsub.vx v10, v9, a0 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv.v.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv4f16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vrsub.vx v10, v9, a0 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv.v.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a0 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv4f16( %a) ret %res } define @reverse_nxv8f16( %a) { -; RV32-LABEL: reverse_nxv8f16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vrsub.vx v12, v10, a0 -; RV32-NEXT: vrgather.vv v10, v8, v12 -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv8f16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV64-NEXT: vid.v v10 -; RV64-NEXT: vrsub.vx v12, v10, a0 -; RV64-NEXT: vrgather.vv v10, v8, v12 -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv8f16( %a) ret %res } define @reverse_nxv16f16( %a) { -; RV32-LABEL: reverse_nxv16f16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vrsub.vx v16, v12, a0 -; RV32-NEXT: vrgather.vv v12, v8, v16 -; RV32-NEXT: vmv.v.v v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv16f16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vrsub.vx v16, v12, a0 -; RV64-NEXT: vrgather.vv v12, v8, v16 -; RV64-NEXT: vmv.v.v v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a0 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv16f16( %a) ret %res } define @reverse_nxv32f16( %a) { -; RV32-LABEL: reverse_nxv32f16: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV32-NEXT: vid.v v16 -; RV32-NEXT: vrsub.vx v24, v16, a0 -; RV32-NEXT: vrgather.vv v16, v8, v24 -; RV32-NEXT: vmv.v.v v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv32f16: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV64-NEXT: vid.v v16 -; RV64-NEXT: vrsub.vx v24, v16, a0 -; RV64-NEXT: vrgather.vv v16, v8, v24 -; RV64-NEXT: vmv.v.v v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv32f16( %a) ret %res } define @reverse_nxv1f32( %a) { -; RV32-LABEL: reverse_nxv1f32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vrsub.vx v10, v9, a0 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv1f32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 3 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vrsub.vx v10, v9, a0 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a0 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv1f32( %a) ret %res } define @reverse_nxv2f32( %a) { -; RV32-LABEL: reverse_nxv2f32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vrsub.vx v10, v9, a0 -; RV32-NEXT: vrgather.vv v9, v8, v10 -; RV32-NEXT: vmv.v.v v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv2f32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vrsub.vx v10, v9, a0 -; RV64-NEXT: vrgather.vv v9, v8, v10 -; RV64-NEXT: vmv.v.v v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a0 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv2f32( %a) ret %res } define @reverse_nxv4f32( %a) { -; RV32-LABEL: reverse_nxv4f32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vrsub.vx v12, v10, a0 -; RV32-NEXT: vrgather.vv v10, v8, v12 -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv4f32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vid.v v10 -; RV64-NEXT: vrsub.vx v12, v10, a0 -; RV64-NEXT: vrgather.vv v10, v8, v12 -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv4f32( %a) ret %res } define @reverse_nxv8f32( %a) { -; RV32-LABEL: reverse_nxv8f32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vrsub.vx v16, v12, a0 -; RV32-NEXT: vrgather.vv v12, v8, v16 -; RV32-NEXT: vmv.v.v v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv8f32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vrsub.vx v16, v12, a0 -; RV64-NEXT: vrgather.vv v12, v8, v16 -; RV64-NEXT: vmv.v.v v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a0 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv8f32( %a) ret %res } define @reverse_nxv16f32( %a) { -; RV32-LABEL: reverse_nxv16f32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vid.v v16 -; RV32-NEXT: vrsub.vx v24, v16, a0 -; RV32-NEXT: vrgather.vv v16, v8, v24 -; RV32-NEXT: vmv.v.v v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_nxv16f32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vid.v v16 -; RV64-NEXT: vrsub.vx v24, v16, a0 -; RV64-NEXT: vrgather.vv v16, v8, v24 -; RV64-NEXT: vmv.v.v v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: reverse_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv16f32( %a) ret %res } diff --git a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll index 1478e8bfd3c65..f27edd3611665 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll @@ -14,7 +14,7 @@ define @foo(ptr %p) { ; CHECK-NEXT: lui a0, 4 ; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: lui a0, 1 -; CHECK-NEXT: addiw a0, a0, -361 +; CHECK-NEXT: addi a0, a0, -361 ; CHECK-NEXT: vmacc.vx v10, a0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v10, 15 diff --git a/llvm/test/CodeGen/RISCV/rvv/select-sra.ll b/llvm/test/CodeGen/RISCV/rvv/select-sra.ll index 6804029eaad70..10c74a0e81e7e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/select-sra.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-sra.ll @@ -20,11 +20,11 @@ define <4 x i32> @vselect_of_consts(<4 x i1> %cc) { ; RV64-LABEL: vselect_of_consts: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 284280 -; RV64-NEXT: addiw a0, a0, 291 +; RV64-NEXT: addi a0, a0, 291 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.x v8, a0 ; RV64-NEXT: lui a0, 214376 -; RV64-NEXT: addiw a0, a0, -2030 +; RV64-NEXT: addi a0, a0, -2030 ; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64-NEXT: ret %v = select <4 x i1> %cc, <4 x i32> , <4 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index c91db751561c3..6984f2b3402a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -104,45 +104,25 @@ define <16 x i8> @v16i8(<16 x i8> %a) { } define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) { -; RV32-LABEL: v16i8_2: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI7_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vle8.v v12, (a0) -; RV32-NEXT: vmv1r.v v14, v9 -; RV32-NEXT: vrgather.vv v10, v8, v12 -; RV32-NEXT: vid.v v8 -; RV32-NEXT: vrsub.vi v8, v8, 15 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; RV32-NEXT: vrgather.vv v10, v14, v8, v0.t -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: v16i8_2: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI7_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0) -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vle8.v v12, (a0) -; RV64-NEXT: vmv1r.v v14, v9 -; RV64-NEXT: vrgather.vv v10, v8, v12 -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vrsub.vi v8, v8, 15 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; RV64-NEXT: vrgather.vv v10, v14, v8, v0.t -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: v16i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vmv1r.v v14, v9 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vi v8, v8, 15 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v14, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %v32i8 = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> ret <32 x i8> %v32i8 } @@ -248,45 +228,25 @@ define <16 x i16> @v16i16(<16 x i16> %a) { } define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { -; RV32-LABEL: v16i16_2: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI15_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI15_0) -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; RV32-NEXT: vle16.v v20, (a0) -; RV32-NEXT: vmv2r.v v16, v10 -; RV32-NEXT: vmv2r.v v12, v8 -; RV32-NEXT: vrgather.vv v8, v12, v20 -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vrsub.vi v12, v12, 15 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV32-NEXT: vrgather.vv v8, v16, v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: v16i16_2: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI15_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI15_0) -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; RV64-NEXT: vle16.v v20, (a0) -; RV64-NEXT: vmv2r.v v16, v10 -; RV64-NEXT: vmv2r.v v12, v8 -; RV64-NEXT: vrgather.vv v8, v12, v20 -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vrsub.vi v12, v12, 15 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV64-NEXT: vrgather.vv v8, v16, v12, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: v16i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI15_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI15_0) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v20, (a0) +; CHECK-NEXT: vmv2r.v v16, v10 +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vrgather.vv v8, v12, v20 +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vi v12, v12, 15 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t +; CHECK-NEXT: ret %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %v32i16 } @@ -401,47 +361,26 @@ define <16 x i32> @v16i32(<16 x i32> %a) { } define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) { -; RV32-LABEL: v16i32_2: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI23_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI23_0) -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vle16.v v20, (a0) -; RV32-NEXT: vmv4r.v v24, v12 -; RV32-NEXT: vmv4r.v v16, v8 -; RV32-NEXT: vrgatherei16.vv v8, v16, v20 -; RV32-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; RV32-NEXT: vid.v v16 -; RV32-NEXT: vrsub.vi v16, v16, 15 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: v16i32_2: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI23_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI23_0) -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64-NEXT: vle16.v v20, (a0) -; RV64-NEXT: vmv4r.v v24, v12 -; RV64-NEXT: vmv4r.v v16, v8 -; RV64-NEXT: vrgatherei16.vv v8, v16, v20 -; RV64-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; RV64-NEXT: vid.v v16 -; RV64-NEXT: vrsub.vi v16, v16, 15 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: v16i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI23_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI23_0) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle16.v v20, (a0) +; CHECK-NEXT: vmv4r.v v24, v12 +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: vrgatherei16.vv v8, v16, v20 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vi v16, v16, 15 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vrgatherei16.vv v8, v24, v16, v0.t +; CHECK-NEXT: ret %v32i32 = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> ret <32 x i32> %v32i32 } @@ -793,3 +732,6 @@ define <32 x i8> @v32i8(<32 x i8> %a) { ret <32 x i8> %v32i8 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 63a85b1f4dc74..f08bfce409305 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -251,7 +251,7 @@ define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB7_5 ; CHECK-NEXT: .LBB7_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -342,7 +342,7 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB8_5 ; CHECK-NEXT: .LBB8_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -433,7 +433,7 @@ define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB9_5 ; CHECK-NEXT: .LBB9_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -524,7 +524,7 @@ define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB10_5 ; CHECK-NEXT: .LBB10_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -615,7 +615,7 @@ define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB11_5 ; CHECK-NEXT: .LBB11_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -706,7 +706,7 @@ define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB12_5 ; CHECK-NEXT: .LBB12_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -797,7 +797,7 @@ define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB13_5 ; CHECK-NEXT: .LBB13_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -990,7 +990,7 @@ define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB17_5 ; CHECK-NEXT: .LBB17_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -1081,7 +1081,7 @@ define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB18_5 ; CHECK-NEXT: .LBB18_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -1172,7 +1172,7 @@ define void @sink_splat_ashr_scalable(ptr nocapture %a) { ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB19_5 ; CHECK-NEXT: .LBB19_2: # %vector.ph -; CHECK-NEXT: addiw a1, a2, -1 +; CHECK-NEXT: addi a1, a2, -1 ; CHECK-NEXT: andi a3, a1, 1024 ; CHECK-NEXT: xori a1, a3, 1024 ; CHECK-NEXT: slli a4, a4, 1 @@ -1467,7 +1467,7 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB26_5 ; CHECK-NEXT: .LBB26_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma @@ -1557,7 +1557,7 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB27_5 ; CHECK-NEXT: .LBB27_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma @@ -1647,7 +1647,7 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB28_5 ; CHECK-NEXT: .LBB28_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma @@ -1737,7 +1737,7 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB29_5 ; CHECK-NEXT: .LBB29_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma @@ -1827,7 +1827,7 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB30_5 ; CHECK-NEXT: .LBB30_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma @@ -1917,7 +1917,7 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB31_5 ; CHECK-NEXT: .LBB31_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma @@ -2083,7 +2083,7 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: j .LBB34_5 ; CHECK-NEXT: .LBB34_2: # %vector.ph -; CHECK-NEXT: addiw a4, a3, -1 +; CHECK-NEXT: addi a4, a3, -1 ; CHECK-NEXT: andi a5, a4, 1024 ; CHECK-NEXT: xori a4, a5, 1024 ; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma @@ -2183,7 +2183,7 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali ; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: j .LBB35_5 ; CHECK-NEXT: .LBB35_2: # %vector.ph -; CHECK-NEXT: addiw a4, a3, -1 +; CHECK-NEXT: addi a4, a3, -1 ; CHECK-NEXT: andi a5, a4, 1024 ; CHECK-NEXT: xori a4, a5, 1024 ; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma @@ -2496,7 +2496,7 @@ define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB42_5 ; CHECK-NEXT: .LBB42_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -2587,7 +2587,7 @@ define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB43_5 ; CHECK-NEXT: .LBB43_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -2678,7 +2678,7 @@ define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB44_5 ; CHECK-NEXT: .LBB44_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 @@ -2769,7 +2769,7 @@ define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB45_5 ; CHECK-NEXT: .LBB45_2: # %vector.ph -; CHECK-NEXT: addiw a2, a3, -1 +; CHECK-NEXT: addi a2, a3, -1 ; CHECK-NEXT: andi a4, a2, 1024 ; CHECK-NEXT: xori a2, a4, 1024 ; CHECK-NEXT: slli a5, a5, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll index 56d98981947c3..b7fe722958bfb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll @@ -32,7 +32,7 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: addiw a1, a0, -1 +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsll.vv v10, v8, v9 ; CHECK-NEXT: vsra.vv v9, v10, v9 ; CHECK-NEXT: vmsne.vv v8, v8, v9 @@ -51,7 +51,7 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addiw a1, a0, -1 +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsll.vv v10, v8, v9 ; CHECK-NEXT: vsra.vv v9, v10, v9 ; CHECK-NEXT: vmsne.vv v8, v8, v9 @@ -114,7 +114,7 @@ define @vec_nxv4i32( %x, ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: addiw a1, a0, -1 +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 @@ -133,7 +133,7 @@ define @vec_nxv8i16( %x, ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addiw a1, a0, -1 +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 diff --git a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll index 839538039c70b..bfbbb4b4067f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll @@ -22,14 +22,14 @@ define @test_urem_vec_even_divisor_eq0( %x) ; RV64-LABEL: test_urem_vec_even_divisor_eq0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, -1365 +; RV64-NEXT: addi a0, a0, -1365 ; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsll.vi v9, v8, 15 ; RV64-NEXT: vsrl.vi v8, v8, 1 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, -1366 +; RV64-NEXT: addi a0, a0, -1366 ; RV64-NEXT: vmsgtu.vx v0, v8, a0 ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 @@ -61,11 +61,11 @@ define @test_urem_vec_odd_divisor_eq0( %x) ; RV64-LABEL: test_urem_vec_odd_divisor_eq0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 1048573 -; RV64-NEXT: addiw a0, a0, -819 +; RV64-NEXT: addi a0, a0, -819 ; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: addi a0, a0, 819 ; RV64-NEXT: vmsgtu.vx v0, v8, a0 ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 @@ -105,13 +105,13 @@ define @test_urem_vec_even_divisor_eq1( %x) ; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV64-NEXT: vsub.vx v8, v8, a0 ; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, -1365 +; RV64-NEXT: addi a0, a0, -1365 ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsll.vi v9, v8, 15 ; RV64-NEXT: vsrl.vi v8, v8, 1 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, -1366 +; RV64-NEXT: addi a0, a0, -1366 ; RV64-NEXT: vmsgtu.vx v0, v8, a0 ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 @@ -148,10 +148,10 @@ define @test_urem_vec_odd_divisor_eq1( %x) ; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV64-NEXT: vsub.vx v8, v8, a0 ; RV64-NEXT: lui a0, 1048573 -; RV64-NEXT: addiw a0, a0, -819 +; RV64-NEXT: addi a0, a0, -819 ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addiw a0, a0, 818 +; RV64-NEXT: addi a0, a0, 818 ; RV64-NEXT: vmsgtu.vx v0, v8, a0 ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll index 5e32e551ba0dd..0028ac88cc4fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll @@ -324,27 +324,16 @@ define @vdiv_vx_nxv1i16( %va, i16 signext % } define @vdiv_vi_nxv1i16_0( %va) { -; RV32-LABEL: vdiv_vi_nxv1i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vmulh.vx v8, v8, a0 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vsrl.vi v9, v8, 15 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv1i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vsrl.vi v9, v8, 15 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv1i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = sdiv %va, %splat @@ -374,27 +363,16 @@ define @vdiv_vx_nxv2i16( %va, i16 signext % } define @vdiv_vi_nxv2i16_0( %va) { -; RV32-LABEL: vdiv_vi_nxv2i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32-NEXT: vmulh.vx v8, v8, a0 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vsrl.vi v9, v8, 15 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv2i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vsrl.vi v9, v8, 15 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv2i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = sdiv %va, %splat @@ -424,27 +402,16 @@ define @vdiv_vx_nxv4i16( %va, i16 signext % } define @vdiv_vi_nxv4i16_0( %va) { -; RV32-LABEL: vdiv_vi_nxv4i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV32-NEXT: vmulh.vx v8, v8, a0 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vsrl.vi v9, v8, 15 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv4i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vsrl.vi v9, v8, 15 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv4i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = sdiv %va, %splat @@ -474,27 +441,16 @@ define @vdiv_vx_nxv8i16( %va, i16 signext % } define @vdiv_vi_nxv8i16_0( %va) { -; RV32-LABEL: vdiv_vi_nxv8i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV32-NEXT: vmulh.vx v8, v8, a0 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vsrl.vi v10, v8, 15 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv8i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vsrl.vi v10, v8, 15 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv8i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsrl.vi v10, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = sdiv %va, %splat @@ -524,27 +480,16 @@ define @vdiv_vx_nxv16i16( %va, i16 signex } define @vdiv_vi_nxv16i16_0( %va) { -; RV32-LABEL: vdiv_vi_nxv16i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV32-NEXT: vmulh.vx v8, v8, a0 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vsrl.vi v12, v8, 15 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv16i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vsrl.vi v12, v8, 15 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv16i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsrl.vi v12, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = sdiv %va, %splat @@ -574,27 +519,16 @@ define @vdiv_vx_nxv32i16( %va, i16 signex } define @vdiv_vi_nxv32i16_0( %va) { -; RV32-LABEL: vdiv_vi_nxv32i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV32-NEXT: vmulh.vx v8, v8, a0 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vsrl.vi v16, v8, 15 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv32i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vsrl.vi v16, v8, 15 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv32i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsrl.vi v16, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = sdiv %va, %splat @@ -639,7 +573,7 @@ define @vdiv_vi_nxv1i32_0( %va) { ; RV64-LABEL: vdiv_vi_nxv1i32_0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addiw a0, a0, -1171 +; RV64-NEXT: addi a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; RV64-NEXT: vmulh.vx v9, v8, a0 ; RV64-NEXT: vsub.vv v8, v9, v8 @@ -691,7 +625,7 @@ define @vdiv_vi_nxv2i32_0( %va) { ; RV64-LABEL: vdiv_vi_nxv2i32_0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addiw a0, a0, -1171 +; RV64-NEXT: addi a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV64-NEXT: vmulh.vx v9, v8, a0 ; RV64-NEXT: vsub.vv v8, v9, v8 @@ -743,7 +677,7 @@ define @vdiv_vi_nxv4i32_0( %va) { ; RV64-LABEL: vdiv_vi_nxv4i32_0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addiw a0, a0, -1171 +; RV64-NEXT: addi a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV64-NEXT: vmulh.vx v10, v8, a0 ; RV64-NEXT: vsub.vv v8, v10, v8 @@ -795,7 +729,7 @@ define @vdiv_vi_nxv8i32_0( %va) { ; RV64-LABEL: vdiv_vi_nxv8i32_0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addiw a0, a0, -1171 +; RV64-NEXT: addi a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV64-NEXT: vmulh.vx v12, v8, a0 ; RV64-NEXT: vsub.vv v8, v12, v8 @@ -847,7 +781,7 @@ define @vdiv_vi_nxv16i32_0( %va) { ; RV64-LABEL: vdiv_vi_nxv16i32_0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addiw a0, a0, -1171 +; RV64-NEXT: addi a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV64-NEXT: vmulh.vx v16, v8, a0 ; RV64-NEXT: vsub.vv v8, v16, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll index 90b8a7fa70b32..c505cb3d1bbd4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll @@ -303,23 +303,14 @@ define @vdivu_vx_nxv1i16( %va, i16 signext } define @vdivu_vi_nxv1i16_0( %va) { -; RV32-LABEL: vdivu_vi_nxv1i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 13 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv1i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 13 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv1i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 13 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -349,23 +340,14 @@ define @vdivu_vx_nxv2i16( %va, i16 signext } define @vdivu_vi_nxv2i16_0( %va) { -; RV32-LABEL: vdivu_vi_nxv2i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 13 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv2i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 13 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv2i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 13 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -395,23 +377,14 @@ define @vdivu_vx_nxv4i16( %va, i16 signext } define @vdivu_vi_nxv4i16_0( %va) { -; RV32-LABEL: vdivu_vi_nxv4i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 13 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv4i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 13 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv4i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 13 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -441,23 +414,14 @@ define @vdivu_vx_nxv8i16( %va, i16 signext } define @vdivu_vi_nxv8i16_0( %va) { -; RV32-LABEL: vdivu_vi_nxv8i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 13 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv8i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 13 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv8i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 13 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -487,23 +451,14 @@ define @vdivu_vx_nxv16i16( %va, i16 signe } define @vdivu_vi_nxv16i16_0( %va) { -; RV32-LABEL: vdivu_vi_nxv16i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 13 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv16i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 13 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv16i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 13 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -533,23 +488,14 @@ define @vdivu_vx_nxv32i16( %va, i16 signe } define @vdivu_vi_nxv32i16_0( %va) { -; RV32-LABEL: vdivu_vi_nxv32i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 13 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv32i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 13 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv32i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 13 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -579,23 +525,14 @@ define @vdivu_vx_nxv1i32( %va, i32 signext } define @vdivu_vi_nxv1i32_0( %va) { -; RV32-LABEL: vdivu_vi_nxv1i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 29 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv1i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 131072 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 29 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv1i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 29 +; CHECK-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -625,23 +562,14 @@ define @vdivu_vx_nxv2i32( %va, i32 signext } define @vdivu_vi_nxv2i32_0( %va) { -; RV32-LABEL: vdivu_vi_nxv2i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 29 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv2i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 131072 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 29 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv2i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 29 +; CHECK-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -671,23 +599,14 @@ define @vdivu_vx_nxv4i32( %va, i32 signext } define @vdivu_vi_nxv4i32_0( %va) { -; RV32-LABEL: vdivu_vi_nxv4i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 29 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv4i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 131072 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 29 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv4i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 29 +; CHECK-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -717,23 +636,14 @@ define @vdivu_vx_nxv8i32( %va, i32 signext } define @vdivu_vi_nxv8i32_0( %va) { -; RV32-LABEL: vdivu_vi_nxv8i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 29 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv8i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 131072 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 29 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv8i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 29 +; CHECK-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -763,23 +673,14 @@ define @vdivu_vx_nxv16i32( %va, i32 signe } define @vdivu_vi_nxv16i32_0( %va) { -; RV32-LABEL: vdivu_vi_nxv16i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: vsrl.vi v8, v8, 29 -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_nxv16i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 131072 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 29 -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_nxv16i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 29 +; CHECK-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = udiv %va, %splat @@ -1231,29 +1132,17 @@ define @vdivu_vx_mask_nxv8i32( %va, i32 sig } define @vdivu_vi_mask_nxv8i32( %va, %mask) { -; RV32-LABEL: vdivu_vi_mask_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 149797 -; RV32-NEXT: addi a0, a0, -1755 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; RV32-NEXT: vmulhu.vx v12, v8, a0 -; RV32-NEXT: vsub.vv v16, v8, v12 -; RV32-NEXT: vsrl.vi v16, v16, 1 -; RV32-NEXT: vadd.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v8, v12, 2, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vdivu_vi_mask_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 149797 -; RV64-NEXT: addiw a0, a0, -1755 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; RV64-NEXT: vmulhu.vx v12, v8, a0 -; RV64-NEXT: vsub.vv v16, v8, v12 -; RV64-NEXT: vsrl.vi v16, v16, 1 -; RV64-NEXT: vadd.vv v12, v16, v12 -; RV64-NEXT: vsrl.vi v8, v12, 2, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vdivu_vi_mask_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 149797 +; CHECK-NEXT: addi a0, a0, -1755 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vmulhu.vx v12, v8, a0 +; CHECK-NEXT: vsub.vv v16, v8, v12 +; CHECK-NEXT: vsrl.vi v16, v16, 1 +; CHECK-NEXT: vadd.vv v12, v16, v12 +; CHECK-NEXT: vsrl.vi v8, v12, 2, v0.t +; CHECK-NEXT: ret %head1 = insertelement poison, i32 1, i32 0 %one = shufflevector %head1, poison, zeroinitializer %head2 = insertelement poison, i32 7, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll index ff236d7def7d6..b8f2afd194e46 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -89,35 +89,35 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) { } define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) { -; RV32-LABEL: vector_interleave_v4i64_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v10, v9 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 2 -; RV32-NEXT: lui a0, 12304 -; RV32-NEXT: addi a0, a0, 512 -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV32-NEXT: vrgatherei16.vv v10, v8, v12 -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret +; CHECK-LABEL: vector_interleave_v4i64_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: lui a0, 12304 +; CHECK-NEXT: addi a0, a0, 512 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret ; -; RV64-LABEL: vector_interleave_v4i64_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v9 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 2 -; RV64-NEXT: lui a0, 12304 -; RV64-NEXT: addiw a0, a0, 512 -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-NEXT: vsext.vf2 v12, v10 -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-NEXT: vrgatherei16.vv v10, v8, v12 -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; ZVBB-LABEL: vector_interleave_v4i64_v2i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vmv1r.v v10, v9 +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVBB-NEXT: vslideup.vi v8, v10, 2 +; ZVBB-NEXT: lui a0, 12304 +; ZVBB-NEXT: addi a0, a0, 512 +; ZVBB-NEXT: vmv.s.x v10, a0 +; ZVBB-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsext.vf2 v12, v10 +; ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; ZVBB-NEXT: vrgatherei16.vv v10, v8, v12 +; ZVBB-NEXT: vmv.v.v v8, v10 +; ZVBB-NEXT: ret %res = call <4 x i64> @llvm.experimental.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b) ret <4 x i64> %res } @@ -237,35 +237,35 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b } define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) { -; RV32-LABEL: vector_interleave_v4f64_v2f64: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v10, v9 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 2 -; RV32-NEXT: lui a0, 12304 -; RV32-NEXT: addi a0, a0, 512 -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v10 -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV32-NEXT: vrgatherei16.vv v10, v8, v12 -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret +; CHECK-LABEL: vector_interleave_v4f64_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: lui a0, 12304 +; CHECK-NEXT: addi a0, a0, 512 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret ; -; RV64-LABEL: vector_interleave_v4f64_v2f64: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v9 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 2 -; RV64-NEXT: lui a0, 12304 -; RV64-NEXT: addiw a0, a0, 512 -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-NEXT: vsext.vf2 v12, v10 -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-NEXT: vrgatherei16.vv v10, v8, v12 -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; ZVBB-LABEL: vector_interleave_v4f64_v2f64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vmv1r.v v10, v9 +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVBB-NEXT: vslideup.vi v8, v10, 2 +; ZVBB-NEXT: lui a0, 12304 +; ZVBB-NEXT: addi a0, a0, 512 +; ZVBB-NEXT: vmv.s.x v10, a0 +; ZVBB-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsext.vf2 v12, v10 +; ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; ZVBB-NEXT: vrgatherei16.vv v10, v8, v12 +; ZVBB-NEXT: vmv.v.v v8, v10 +; ZVBB-NEXT: ret %res = call <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b) ret <4 x double> %res } @@ -277,3 +277,6 @@ declare <4 x float> @llvm.experimental.vector.interleave2.v4f32(<2 x float>, <2 declare <16 x half> @llvm.experimental.vector.interleave2.v16f16(<8 x half>, <8 x half>) declare <8 x float> @llvm.experimental.vector.interleave2.v8f32(<4 x float>, <4 x float>) declare <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double>, <2 x double>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll index d407cf43a4fc1..58874fe8c8fca 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll @@ -392,31 +392,18 @@ define @vrem_vx_nxv1i16( %va, i16 signext % } define @vrem_vi_nxv1i16_0( %va) { -; RV32-LABEL: vrem_vi_nxv1i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vmulh.vx v9, v8, a0 -; RV32-NEXT: vsra.vi v9, v9, 1 -; RV32-NEXT: vsrl.vi v10, v9, 15 -; RV32-NEXT: vadd.vv v9, v9, v10 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv1i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vmulh.vx v9, v8, a0 -; RV64-NEXT: vsra.vi v9, v9, 1 -; RV64-NEXT: vsrl.vi v10, v9, 15 -; RV64-NEXT: vadd.vv v9, v9, v10 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv1i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmulh.vx v9, v8, a0 +; CHECK-NEXT: vsra.vi v9, v9, 1 +; CHECK-NEXT: vsrl.vi v10, v9, 15 +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = srem %va, %splat @@ -459,31 +446,18 @@ define @vrem_vx_nxv2i16( %va, i16 signext % } define @vrem_vi_nxv2i16_0( %va) { -; RV32-LABEL: vrem_vi_nxv2i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32-NEXT: vmulh.vx v9, v8, a0 -; RV32-NEXT: vsra.vi v9, v9, 1 -; RV32-NEXT: vsrl.vi v10, v9, 15 -; RV32-NEXT: vadd.vv v9, v9, v10 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv2i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64-NEXT: vmulh.vx v9, v8, a0 -; RV64-NEXT: vsra.vi v9, v9, 1 -; RV64-NEXT: vsrl.vi v10, v9, 15 -; RV64-NEXT: vadd.vv v9, v9, v10 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv2i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmulh.vx v9, v8, a0 +; CHECK-NEXT: vsra.vi v9, v9, 1 +; CHECK-NEXT: vsrl.vi v10, v9, 15 +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = srem %va, %splat @@ -526,31 +500,18 @@ define @vrem_vx_nxv4i16( %va, i16 signext % } define @vrem_vi_nxv4i16_0( %va) { -; RV32-LABEL: vrem_vi_nxv4i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV32-NEXT: vmulh.vx v9, v8, a0 -; RV32-NEXT: vsra.vi v9, v9, 1 -; RV32-NEXT: vsrl.vi v10, v9, 15 -; RV32-NEXT: vadd.vv v9, v9, v10 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv4i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV64-NEXT: vmulh.vx v9, v8, a0 -; RV64-NEXT: vsra.vi v9, v9, 1 -; RV64-NEXT: vsrl.vi v10, v9, 15 -; RV64-NEXT: vadd.vv v9, v9, v10 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv4i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmulh.vx v9, v8, a0 +; CHECK-NEXT: vsra.vi v9, v9, 1 +; CHECK-NEXT: vsrl.vi v10, v9, 15 +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = srem %va, %splat @@ -593,31 +554,18 @@ define @vrem_vx_nxv8i16( %va, i16 signext % } define @vrem_vi_nxv8i16_0( %va) { -; RV32-LABEL: vrem_vi_nxv8i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV32-NEXT: vmulh.vx v10, v8, a0 -; RV32-NEXT: vsra.vi v10, v10, 1 -; RV32-NEXT: vsrl.vi v12, v10, 15 -; RV32-NEXT: vadd.vv v10, v10, v12 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv8i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV64-NEXT: vmulh.vx v10, v8, a0 -; RV64-NEXT: vsra.vi v10, v10, 1 -; RV64-NEXT: vsrl.vi v12, v10, 15 -; RV64-NEXT: vadd.vv v10, v10, v12 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv8i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmulh.vx v10, v8, a0 +; CHECK-NEXT: vsra.vi v10, v10, 1 +; CHECK-NEXT: vsrl.vi v12, v10, 15 +; CHECK-NEXT: vadd.vv v10, v10, v12 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v10 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = srem %va, %splat @@ -660,31 +608,18 @@ define @vrem_vx_nxv16i16( %va, i16 signex } define @vrem_vi_nxv16i16_0( %va) { -; RV32-LABEL: vrem_vi_nxv16i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV32-NEXT: vmulh.vx v12, v8, a0 -; RV32-NEXT: vsra.vi v12, v12, 1 -; RV32-NEXT: vsrl.vi v16, v12, 15 -; RV32-NEXT: vadd.vv v12, v12, v16 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv16i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV64-NEXT: vmulh.vx v12, v8, a0 -; RV64-NEXT: vsra.vi v12, v12, 1 -; RV64-NEXT: vsrl.vi v16, v12, 15 -; RV64-NEXT: vadd.vv v12, v12, v16 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v12 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv16i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmulh.vx v12, v8, a0 +; CHECK-NEXT: vsra.vi v12, v12, 1 +; CHECK-NEXT: vsrl.vi v16, v12, 15 +; CHECK-NEXT: vadd.vv v12, v12, v16 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v12 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = srem %va, %splat @@ -727,31 +662,18 @@ define @vrem_vx_nxv32i16( %va, i16 signex } define @vrem_vi_nxv32i16_0( %va) { -; RV32-LABEL: vrem_vi_nxv32i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 1048571 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV32-NEXT: vmulh.vx v16, v8, a0 -; RV32-NEXT: vsra.vi v16, v16, 1 -; RV32-NEXT: vsrl.vi v24, v16, 15 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv32i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1048571 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV64-NEXT: vmulh.vx v16, v8, a0 -; RV64-NEXT: vsra.vi v16, v16, 1 -; RV64-NEXT: vsrl.vi v24, v16, 15 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v16 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv32i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048571 +; CHECK-NEXT: addi a0, a0, 1755 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmulh.vx v16, v8, a0 +; CHECK-NEXT: vsra.vi v16, v16, 1 +; CHECK-NEXT: vsrl.vi v24, v16, 15 +; CHECK-NEXT: vadd.vv v16, v16, v24 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = srem %va, %splat @@ -798,7 +720,7 @@ define @vrem_vi_nxv1i32_0( %va) { ; RV64-LABEL: vrem_vi_nxv1i32_0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addiw a0, a0, -1171 +; RV64-NEXT: addi a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; RV64-NEXT: vmulh.vx v9, v8, a0 ; RV64-NEXT: vsub.vv v9, v9, v8 @@ -854,7 +776,7 @@ define @vrem_vi_nxv2i32_0( %va) { ; RV64-LABEL: vrem_vi_nxv2i32_0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addiw a0, a0, -1171 +; RV64-NEXT: addi a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV64-NEXT: vmulh.vx v9, v8, a0 ; RV64-NEXT: vsub.vv v9, v9, v8 @@ -910,7 +832,7 @@ define @vrem_vi_nxv4i32_0( %va) { ; RV64-LABEL: vrem_vi_nxv4i32_0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addiw a0, a0, -1171 +; RV64-NEXT: addi a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV64-NEXT: vmulh.vx v10, v8, a0 ; RV64-NEXT: vsub.vv v10, v10, v8 @@ -966,7 +888,7 @@ define @vrem_vi_nxv8i32_0( %va) { ; RV64-LABEL: vrem_vi_nxv8i32_0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addiw a0, a0, -1171 +; RV64-NEXT: addi a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV64-NEXT: vmulh.vx v12, v8, a0 ; RV64-NEXT: vsub.vv v12, v12, v8 @@ -1022,7 +944,7 @@ define @vrem_vi_nxv16i32_0( %va) { ; RV64-LABEL: vrem_vi_nxv16i32_0: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addiw a0, a0, -1171 +; RV64-NEXT: addi a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV64-NEXT: vmulh.vx v16, v8, a0 ; RV64-NEXT: vsub.vv v16, v16, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll index 4f85acb0bd5ee..428d071cac399 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll @@ -293,27 +293,16 @@ define @vremu_vx_nxv1i16( %va, i16 signext } define @vremu_vi_nxv1i16_0( %va) { -; RV32-LABEL: vremu_vi_nxv1i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vmulhu.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v9, v9, 13 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv1i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vmulhu.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v9, v9, 13 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv1i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmulhu.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v9, v9, 13 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat @@ -343,27 +332,16 @@ define @vremu_vx_nxv2i16( %va, i16 signext } define @vremu_vi_nxv2i16_0( %va) { -; RV32-LABEL: vremu_vi_nxv2i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32-NEXT: vmulhu.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v9, v9, 13 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv2i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64-NEXT: vmulhu.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v9, v9, 13 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv2i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmulhu.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v9, v9, 13 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat @@ -393,27 +371,16 @@ define @vremu_vx_nxv4i16( %va, i16 signext } define @vremu_vi_nxv4i16_0( %va) { -; RV32-LABEL: vremu_vi_nxv4i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV32-NEXT: vmulhu.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v9, v9, 13 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv4i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV64-NEXT: vmulhu.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v9, v9, 13 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv4i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v9, v9, 13 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat @@ -443,27 +410,16 @@ define @vremu_vx_nxv8i16( %va, i16 signext } define @vremu_vi_nxv8i16_0( %va) { -; RV32-LABEL: vremu_vi_nxv8i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV32-NEXT: vmulhu.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v10, v10, 13 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv8i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV64-NEXT: vmulhu.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v10, v10, 13 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv8i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v10, v10, 13 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v10 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat @@ -493,27 +449,16 @@ define @vremu_vx_nxv16i16( %va, i16 signe } define @vremu_vi_nxv16i16_0( %va) { -; RV32-LABEL: vremu_vi_nxv16i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV32-NEXT: vmulhu.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v12, v12, 13 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv16i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; RV64-NEXT: vmulhu.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v12, v12, 13 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v12 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv16i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmulhu.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v12, v12, 13 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v12 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat @@ -543,27 +488,16 @@ define @vremu_vx_nxv32i16( %va, i16 signe } define @vremu_vi_nxv32i16_0( %va) { -; RV32-LABEL: vremu_vi_nxv32i16_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 2 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV32-NEXT: vmulhu.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v16, v16, 13 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv32i16_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV64-NEXT: vmulhu.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v16, v16, 13 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v16 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv32i16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmulhu.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 13 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 +; CHECK-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat @@ -593,27 +527,16 @@ define @vremu_vx_nxv1i32( %va, i32 signext } define @vremu_vi_nxv1i32_0( %va) { -; RV32-LABEL: vremu_vi_nxv1i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vmulhu.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v9, v9, 29 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv1i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 131072 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vmulhu.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v9, v9, 29 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv1i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmulhu.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v9, v9, 29 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat @@ -643,27 +566,16 @@ define @vremu_vx_nxv2i32( %va, i32 signext } define @vremu_vi_nxv2i32_0( %va) { -; RV32-LABEL: vremu_vi_nxv2i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmulhu.vx v9, v8, a0 -; RV32-NEXT: vsrl.vi v9, v9, 29 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv2i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 131072 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vmulhu.vx v9, v8, a0 -; RV64-NEXT: vsrl.vi v9, v9, 29 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv2i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v9, v8, a0 +; CHECK-NEXT: vsrl.vi v9, v9, 29 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat @@ -693,27 +605,16 @@ define @vremu_vx_nxv4i32( %va, i32 signext } define @vremu_vi_nxv4i32_0( %va) { -; RV32-LABEL: vremu_vi_nxv4i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vmulhu.vx v10, v8, a0 -; RV32-NEXT: vsrl.vi v10, v10, 29 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv4i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 131072 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vmulhu.vx v10, v8, a0 -; RV64-NEXT: vsrl.vi v10, v10, 29 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv4i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a0 +; CHECK-NEXT: vsrl.vi v10, v10, 29 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v10 +; CHECK-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat @@ -743,27 +644,16 @@ define @vremu_vx_nxv8i32( %va, i32 signext } define @vremu_vi_nxv8i32_0( %va) { -; RV32-LABEL: vremu_vi_nxv8i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vmulhu.vx v12, v8, a0 -; RV32-NEXT: vsrl.vi v12, v12, 29 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv8i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 131072 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64-NEXT: vmulhu.vx v12, v8, a0 -; RV64-NEXT: vsrl.vi v12, v12, 29 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v12 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv8i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmulhu.vx v12, v8, a0 +; CHECK-NEXT: vsrl.vi v12, v12, 29 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v12 +; CHECK-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat @@ -793,27 +683,16 @@ define @vremu_vx_nxv16i32( %va, i32 signe } define @vremu_vi_nxv16i32_0( %va) { -; RV32-LABEL: vremu_vi_nxv16i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vmulhu.vx v16, v8, a0 -; RV32-NEXT: vsrl.vi v16, v16, 29 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: vremu_vi_nxv16i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 131072 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vmulhu.vx v16, v8, a0 -; RV64-NEXT: vsrl.vi v16, v16, 29 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v16 -; RV64-NEXT: ret +; CHECK-LABEL: vremu_vi_nxv16i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vmulhu.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 29 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 +; CHECK-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = urem %va, %splat diff --git a/llvm/test/CodeGen/RISCV/select-binop-identity.ll b/llvm/test/CodeGen/RISCV/select-binop-identity.ll index ebf47cdfd2a1d..61344bc8979ab 100644 --- a/llvm/test/CodeGen/RISCV/select-binop-identity.ll +++ b/llvm/test/CodeGen/RISCV/select-binop-identity.ll @@ -266,7 +266,7 @@ define signext i32 @add_select_all_zeros_i32(i1 zeroext %c, i32 signext %x, i32 ; ; RV64I-LABEL: add_select_all_zeros_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: addw a0, a2, a0 ; RV64I-NEXT: ret @@ -366,7 +366,7 @@ define signext i32 @sub_select_all_zeros_i32(i1 zeroext %c, i32 signext %x, i32 ; ; RV64I-LABEL: sub_select_all_zeros_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: subw a0, a2, a0 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll index eacc26c18415d..2f03ff969205f 100644 --- a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll +++ b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll @@ -42,7 +42,7 @@ define i32 @from_cmpeq_fail_bad_andmask(i32 %xx, i32 %y) { ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: addi a0, a0, -9 ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 3 ; RV64I-NEXT: ret @@ -142,7 +142,7 @@ define i32 @from_i1_fail_bad_select1(i1 %x, i32 %y) { ; ; RV64I-LABEL: from_i1_fail_bad_select1: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 1 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll index e07f1d6f59435..d4a6e9e9dbb46 100644 --- a/llvm/test/CodeGen/RISCV/select.ll +++ b/llvm/test/CodeGen/RISCV/select.ll @@ -147,7 +147,7 @@ define i16 @select_xor_3(i16 %A, i8 %cond) { ; RV64IM-LABEL: select_xor_3: ; RV64IM: # %bb.0: # %entry ; RV64IM-NEXT: andi a1, a1, 1 -; RV64IM-NEXT: addiw a1, a1, -1 +; RV64IM-NEXT: addi a1, a1, -1 ; RV64IM-NEXT: andi a1, a1, 43 ; RV64IM-NEXT: xor a0, a0, a1 ; RV64IM-NEXT: ret @@ -189,7 +189,7 @@ define i16 @select_xor_3b(i16 %A, i8 %cond) { ; RV64IM-LABEL: select_xor_3b: ; RV64IM: # %bb.0: # %entry ; RV64IM-NEXT: andi a1, a1, 1 -; RV64IM-NEXT: addiw a1, a1, -1 +; RV64IM-NEXT: addi a1, a1, -1 ; RV64IM-NEXT: andi a1, a1, 43 ; RV64IM-NEXT: xor a0, a0, a1 ; RV64IM-NEXT: ret @@ -627,7 +627,7 @@ define i32 @select_add_2(i1 zeroext %cond, i32 %a, i32 %b) { ; ; RV64IM-LABEL: select_add_2: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: addiw a0, a0, -1 +; RV64IM-NEXT: addi a0, a0, -1 ; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: addw a0, a1, a0 ; RV64IM-NEXT: ret @@ -665,7 +665,7 @@ define i32 @select_add_3(i1 zeroext %cond, i32 %a) { ; ; RV64IM-LABEL: select_add_3: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: addiw a0, a0, -1 +; RV64IM-NEXT: addi a0, a0, -1 ; RV64IM-NEXT: andi a0, a0, 42 ; RV64IM-NEXT: addw a0, a1, a0 ; RV64IM-NEXT: ret @@ -754,7 +754,7 @@ define i32 @select_sub_2(i1 zeroext %cond, i32 %a, i32 %b) { ; ; RV64IM-LABEL: select_sub_2: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: addiw a0, a0, -1 +; RV64IM-NEXT: addi a0, a0, -1 ; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: subw a0, a1, a0 ; RV64IM-NEXT: ret @@ -792,7 +792,7 @@ define i32 @select_sub_3(i1 zeroext %cond, i32 %a) { ; ; RV64IM-LABEL: select_sub_3: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: addiw a0, a0, -1 +; RV64IM-NEXT: addi a0, a0, -1 ; RV64IM-NEXT: andi a0, a0, 42 ; RV64IM-NEXT: subw a0, a1, a0 ; RV64IM-NEXT: ret @@ -1159,7 +1159,7 @@ define i32 @select_shl_2(i1 zeroext %cond, i32 %a, i32 %b) { ; ; RV64IM-LABEL: select_shl_2: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: addiw a0, a0, -1 +; RV64IM-NEXT: addi a0, a0, -1 ; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: sllw a0, a1, a0 ; RV64IM-NEXT: ret @@ -1256,7 +1256,7 @@ define i32 @select_ashr_2(i1 zeroext %cond, i32 %a, i32 %b) { ; ; RV64IM-LABEL: select_ashr_2: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: addiw a0, a0, -1 +; RV64IM-NEXT: addi a0, a0, -1 ; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: sraw a0, a1, a0 ; RV64IM-NEXT: ret @@ -1353,7 +1353,7 @@ define i32 @select_lshr_2(i1 zeroext %cond, i32 %a, i32 %b) { ; ; RV64IM-LABEL: select_lshr_2: ; RV64IM: # %bb.0: # %entry -; RV64IM-NEXT: addiw a0, a0, -1 +; RV64IM-NEXT: addi a0, a0, -1 ; RV64IM-NEXT: and a0, a0, a2 ; RV64IM-NEXT: srlw a0, a1, a0 ; RV64IM-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll index f9b72ccb26f38..3e6893731dd03 100644 --- a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll @@ -76,19 +76,12 @@ define i32 @not_pos_sel_same_variable(i32 signext %a) { ; Compare if positive and select of constants where one constant is zero. define i32 @pos_sel_constants(i32 signext %a) { -; RV32-LABEL: pos_sel_constants: -; RV32: # %bb.0: -; RV32-NEXT: slti a0, a0, 0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: andi a0, a0, 5 -; RV32-NEXT: ret -; -; RV64-LABEL: pos_sel_constants: -; RV64: # %bb.0: -; RV64-NEXT: slti a0, a0, 0 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: andi a0, a0, 5 -; RV64-NEXT: ret +; CHECK-LABEL: pos_sel_constants: +; CHECK: # %bb.0: +; CHECK-NEXT: slti a0, a0, 0 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: andi a0, a0, 5 +; CHECK-NEXT: ret %tmp.1 = icmp sgt i32 %a, -1 %retval = select i1 %tmp.1, i32 5, i32 0 ret i32 %retval diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index f9ffb6603fa0e..3babef93499c8 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -181,9 +181,9 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-NEXT: lui a1, 209715 ; RV64I-NEXT: addiw s1, a1, 819 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw s2, a1, -241 +; RV64I-NEXT: addi s2, a1, -241 ; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw s3, a1, 257 +; RV64I-NEXT: addi s3, a1, 257 ; RV64I-NEXT: .LBB4_1: # %bb2 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: call bar@plt @@ -1349,7 +1349,7 @@ define signext i32 @sextw_sh2add(i1 zeroext %0, ptr %1, i32 signext %2, i32 sign ; NOREMOVAL-LABEL: sextw_sh2add: ; NOREMOVAL: # %bb.0: ; NOREMOVAL-NEXT: sh2add a2, a2, a3 -; NOREMOVAL-NEXT: sext.w a2, a2 +; NOREMOVAL-NEXT: mv a2, a2 ; NOREMOVAL-NEXT: beqz a0, .LBB22_2 ; NOREMOVAL-NEXT: # %bb.1: ; NOREMOVAL-NEXT: sw a2, 0(a1) diff --git a/llvm/test/CodeGen/RISCV/shl-demanded.ll b/llvm/test/CodeGen/RISCV/shl-demanded.ll index 4e3c063eff2de..b0e3ebcd50566 100644 --- a/llvm/test/CodeGen/RISCV/shl-demanded.ll +++ b/llvm/test/CodeGen/RISCV/shl-demanded.ll @@ -239,7 +239,7 @@ define i32 @set_shl_mask(i32 %x, i32 %y) { ; RV64I-LABEL: set_shl_mask: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a3, a2, 1 +; RV64I-NEXT: addi a3, a2, 1 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: sllw a0, a0, a1 ; RV64I-NEXT: and a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/shlimm-addimm.ll b/llvm/test/CodeGen/RISCV/shlimm-addimm.ll index 4a4762a0db146..ead71bcbe113c 100644 --- a/llvm/test/CodeGen/RISCV/shlimm-addimm.ll +++ b/llvm/test/CodeGen/RISCV/shlimm-addimm.ll @@ -78,7 +78,7 @@ define i32 @shl5_add101024_a(i32 %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 5 ; RV64I-NEXT: lui a1, 25 -; RV64I-NEXT: addiw a1, a1, -1376 +; RV64I-NEXT: addi a1, a1, -1376 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %tmp0 = shl i32 %x, 5 @@ -99,7 +99,7 @@ define signext i32 @shl5_add101024_b(i32 signext %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 5 ; RV64I-NEXT: lui a1, 25 -; RV64I-NEXT: addiw a1, a1, -1376 +; RV64I-NEXT: addi a1, a1, -1376 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %tmp0 = shl i32 %x, 5 @@ -146,7 +146,7 @@ define i32 @shl5_add47968_a(i32 %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 5 ; RV64I-NEXT: lui a1, 12 -; RV64I-NEXT: addiw a1, a1, -1184 +; RV64I-NEXT: addi a1, a1, -1184 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %tmp0 = shl i32 %x, 5 @@ -167,7 +167,7 @@ define signext i32 @shl5_add47968_b(i32 signext %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 5 ; RV64I-NEXT: lui a1, 12 -; RV64I-NEXT: addiw a1, a1, -1184 +; RV64I-NEXT: addi a1, a1, -1184 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %tmp0 = shl i32 %x, 5 @@ -214,7 +214,7 @@ define i32 @shl5_add47969_a(i32 %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 5 ; RV64I-NEXT: lui a1, 12 -; RV64I-NEXT: addiw a1, a1, -1183 +; RV64I-NEXT: addi a1, a1, -1183 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %tmp0 = shl i32 %x, 5 @@ -235,7 +235,7 @@ define signext i32 @shl5_add47969_b(i32 signext %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 5 ; RV64I-NEXT: lui a1, 12 -; RV64I-NEXT: addiw a1, a1, -1183 +; RV64I-NEXT: addi a1, a1, -1183 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %tmp0 = shl i32 %x, 5 @@ -282,7 +282,7 @@ define i32 @shl5_sub47968_a(i32 %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 5 ; RV64I-NEXT: lui a1, 1048564 -; RV64I-NEXT: addiw a1, a1, 1184 +; RV64I-NEXT: addi a1, a1, 1184 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %tmp0 = shl i32 %x, 5 @@ -303,7 +303,7 @@ define signext i32 @shl5_sub47968_b(i32 signext %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 5 ; RV64I-NEXT: lui a1, 1048564 -; RV64I-NEXT: addiw a1, a1, 1184 +; RV64I-NEXT: addi a1, a1, 1184 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %tmp0 = shl i32 %x, 5 @@ -351,7 +351,7 @@ define i32 @shl5_sub47969_a(i32 %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 5 ; RV64I-NEXT: lui a1, 1048564 -; RV64I-NEXT: addiw a1, a1, 1183 +; RV64I-NEXT: addi a1, a1, 1183 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %tmp0 = shl i32 %x, 5 @@ -372,7 +372,7 @@ define signext i32 @shl5_sub47969_b(i32 signext %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 5 ; RV64I-NEXT: lui a1, 1048564 -; RV64I-NEXT: addiw a1, a1, 1183 +; RV64I-NEXT: addi a1, a1, 1183 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret %tmp0 = shl i32 %x, 5 diff --git a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll index 4eea8643101b1..0860853ae9c0a 100644 --- a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll +++ b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll @@ -265,7 +265,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { ; ; RV64-LABEL: add_ugecmp_i16_i8: ; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, -128 +; RV64-NEXT: addi a0, a0, -128 ; RV64-NEXT: slli a0, a0, 48 ; RV64-NEXT: srli a0, a0, 56 ; RV64-NEXT: sltiu a0, a0, 255 @@ -482,7 +482,7 @@ define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind { ; ; RV64-LABEL: add_ugtcmp_i16_i8: ; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, -128 +; RV64-NEXT: addi a0, a0, -128 ; RV64-NEXT: slli a0, a0, 48 ; RV64-NEXT: srli a0, a0, 56 ; RV64-NEXT: sltiu a0, a0, 255 @@ -508,7 +508,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { ; ; RV64I-LABEL: add_ultcmp_i16_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: addi a0, a0, 128 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 256 @@ -523,7 +523,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ultcmp_i16_i8: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 128 +; RV64ZBB-NEXT: addi a0, a0, 128 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 256 ; RV64ZBB-NEXT: ret @@ -688,7 +688,7 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind { ; ; RV64I-LABEL: add_ulecmp_i16_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: addi a0, a0, 128 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 256 @@ -703,7 +703,7 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ulecmp_i16_i8: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 128 +; RV64ZBB-NEXT: addi a0, a0, 128 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 256 ; RV64ZBB-NEXT: ret @@ -784,7 +784,7 @@ define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind { ; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_cmp: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: zext.h a1, a1 -; RV64ZBB-NEXT: addiw a0, a0, 128 +; RV64ZBB-NEXT: addi a0, a0, 128 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltu a0, a0, a1 ; RV64ZBB-NEXT: ret @@ -805,7 +805,7 @@ define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind { ; ; RV64I-LABEL: add_ultcmp_bad_i8_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: addi a0, a0, 128 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 128 @@ -820,7 +820,7 @@ define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ultcmp_bad_i8_i16: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 128 +; RV64ZBB-NEXT: addi a0, a0, 128 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 128 ; RV64ZBB-NEXT: ret @@ -841,7 +841,7 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind { ; ; RV64I-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 192 +; RV64I-NEXT: addi a0, a0, 192 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 256 @@ -856,7 +856,7 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 192 +; RV64ZBB-NEXT: addi a0, a0, 192 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 256 ; RV64ZBB-NEXT: ret @@ -877,7 +877,7 @@ define i1 @add_ultcmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind { ; ; RV64I-LABEL: add_ultcmp_bad_i16_i8_c1notpoweroftwo: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: addi a0, a0, 128 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 768 @@ -892,7 +892,7 @@ define i1 @add_ultcmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_c1notpoweroftwo: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 128 +; RV64ZBB-NEXT: addi a0, a0, 128 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 768 ; RV64ZBB-NEXT: ret @@ -913,7 +913,7 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind { ; ; RV64I-LABEL: add_ultcmp_bad_i16_i8_magic: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 64 +; RV64I-NEXT: addi a0, a0, 64 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 256 @@ -928,7 +928,7 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ultcmp_bad_i16_i8_magic: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 64 +; RV64ZBB-NEXT: addi a0, a0, 64 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 256 ; RV64ZBB-NEXT: ret @@ -949,7 +949,7 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind { ; ; RV64I-LABEL: add_ultcmp_bad_i16_i4: ; RV64I: # %bb.0: -; RV64I-NEXT: addiw a0, a0, 8 +; RV64I-NEXT: addi a0, a0, 8 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sltiu a0, a0, 16 @@ -964,7 +964,7 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind { ; ; RV64ZBB-LABEL: add_ultcmp_bad_i16_i4: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: addiw a0, a0, 8 +; RV64ZBB-NEXT: addi a0, a0, 8 ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: sltiu a0, a0, 16 ; RV64ZBB-NEXT: ret @@ -985,7 +985,7 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind { ; ; RV64-LABEL: add_ultcmp_bad_i24_i8: ; RV64: # %bb.0: -; RV64-NEXT: addiw a0, a0, 128 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: slli a0, a0, 40 ; RV64-NEXT: srli a0, a0, 40 ; RV64-NEXT: sltiu a0, a0, 256 diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll index 24e740fd143d1..1dcb043823923 100644 --- a/llvm/test/CodeGen/RISCV/srem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -198,7 +198,7 @@ define i32 @fold_srem_negative_even(i32 %x) nounwind { ; RV64IM-NEXT: srai a1, a1, 40 ; RV64IM-NEXT: add a1, a1, a2 ; RV64IM-NEXT: lui a2, 1048570 -; RV64IM-NEXT: addiw a2, a2, 1595 +; RV64IM-NEXT: addi a2, a2, 1595 ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index dcf701be76f62..6ed352b51f254 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -34,7 +34,7 @@ define i1 @test_srem_odd(i29 %X) nounwind { ; RV64-NEXT: addiw a1, a1, 331 ; RV64-NEXT: call __muldi3@plt ; RV64-NEXT: lui a1, 662 -; RV64-NEXT: addiw a1, a1, -83 +; RV64-NEXT: addi a1, a1, -83 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: slli a0, a0, 35 ; RV64-NEXT: srli a0, a0, 35 @@ -63,10 +63,10 @@ define i1 @test_srem_odd(i29 %X) nounwind { ; RV64M-LABEL: test_srem_odd: ; RV64M: # %bb.0: ; RV64M-NEXT: lui a1, 128424 -; RV64M-NEXT: addiw a1, a1, 331 +; RV64M-NEXT: addi a1, a1, 331 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: lui a1, 662 -; RV64M-NEXT: addiw a1, a1, -83 +; RV64M-NEXT: addi a1, a1, -83 ; RV64M-NEXT: add a0, a0, a1 ; RV64M-NEXT: slli a0, a0, 35 ; RV64M-NEXT: srli a0, a0, 35 @@ -93,10 +93,10 @@ define i1 @test_srem_odd(i29 %X) nounwind { ; RV64MV-LABEL: test_srem_odd: ; RV64MV: # %bb.0: ; RV64MV-NEXT: lui a1, 128424 -; RV64MV-NEXT: addiw a1, a1, 331 +; RV64MV-NEXT: addi a1, a1, 331 ; RV64MV-NEXT: mul a0, a0, a1 ; RV64MV-NEXT: lui a1, 662 -; RV64MV-NEXT: addiw a1, a1, -83 +; RV64MV-NEXT: addi a1, a1, -83 ; RV64MV-NEXT: add a0, a0, a1 ; RV64MV-NEXT: slli a0, a0, 35 ; RV64MV-NEXT: srli a0, a0, 35 diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index b5f1efa4b160b..2e0c541311e10 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -909,7 +909,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV64IM-NEXT: srli a3, a3, 11 ; RV64IM-NEXT: add a3, a3, a5 ; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: addi a5, a5, 1327 ; RV64IM-NEXT: mul a3, a3, a5 ; RV64IM-NEXT: subw a4, a4, a3 ; RV64IM-NEXT: sh zero, 0(a0) @@ -1053,7 +1053,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV64IM-NEXT: srli a3, a3, 11 ; RV64IM-NEXT: add a3, a3, a5 ; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: addi a5, a5, 1327 ; RV64IM-NEXT: mul a3, a3, a5 ; RV64IM-NEXT: subw a4, a4, a3 ; RV64IM-NEXT: srli a3, a1, 49 diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll index 3d181c3a30d09..1b2cc1398ec11 100644 --- a/llvm/test/CodeGen/RISCV/urem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll @@ -44,7 +44,7 @@ define i32 @fold_urem_positive_odd(i32 %x) nounwind { ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 32 ; RV64IM-NEXT: lui a2, 364242 -; RV64IM-NEXT: addiw a2, a2, 777 +; RV64IM-NEXT: addi a2, a2, 777 ; RV64IM-NEXT: slli a2, a2, 32 ; RV64IM-NEXT: mulhu a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 32 @@ -94,7 +94,7 @@ define i32 @fold_urem_positive_even(i32 %x) nounwind { ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 32 ; RV64IM-NEXT: lui a2, 1012964 -; RV64IM-NEXT: addiw a2, a2, -61 +; RV64IM-NEXT: addi a2, a2, -61 ; RV64IM-NEXT: slli a2, a2, 32 ; RV64IM-NEXT: mulhu a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 42 @@ -170,7 +170,7 @@ define i32 @combine_urem_udiv(i32 %x) nounwind { ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 32 ; RV64IM-NEXT: lui a2, 364242 -; RV64IM-NEXT: addiw a2, a2, 777 +; RV64IM-NEXT: addi a2, a2, 777 ; RV64IM-NEXT: slli a2, a2, 32 ; RV64IM-NEXT: mulhu a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 32 diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index 456d98fd4e47f..df30946218dfa 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -48,7 +48,7 @@ define i1 @test_urem_odd(i13 %X) nounwind { ; RV64M-LABEL: test_urem_odd: ; RV64M: # %bb.0: ; RV64M-NEXT: lui a1, 1 -; RV64M-NEXT: addiw a1, a1, -819 +; RV64M-NEXT: addi a1, a1, -819 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: slli a0, a0, 51 ; RV64M-NEXT: srli a0, a0, 51 @@ -68,7 +68,7 @@ define i1 @test_urem_odd(i13 %X) nounwind { ; RV64MV-LABEL: test_urem_odd: ; RV64MV: # %bb.0: ; RV64MV-NEXT: lui a1, 1 -; RV64MV-NEXT: addiw a1, a1, -819 +; RV64MV-NEXT: addi a1, a1, -819 ; RV64MV-NEXT: mul a0, a0, a1 ; RV64MV-NEXT: slli a0, a0, 51 ; RV64MV-NEXT: srli a0, a0, 51 @@ -139,7 +139,7 @@ define i1 @test_urem_even(i27 %X) nounwind { ; RV64M-LABEL: test_urem_even: ; RV64M: # %bb.0: ; RV64M-NEXT: lui a1, 28087 -; RV64M-NEXT: addiw a1, a1, -585 +; RV64M-NEXT: addi a1, a1, -585 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: slli a1, a0, 26 ; RV64M-NEXT: slli a0, a0, 37 @@ -171,7 +171,7 @@ define i1 @test_urem_even(i27 %X) nounwind { ; RV64MV-LABEL: test_urem_even: ; RV64MV: # %bb.0: ; RV64MV-NEXT: lui a1, 28087 -; RV64MV-NEXT: addiw a1, a1, -585 +; RV64MV-NEXT: addi a1, a1, -585 ; RV64MV-NEXT: mul a0, a0, a1 ; RV64MV-NEXT: slli a1, a0, 26 ; RV64MV-NEXT: slli a0, a0, 37 @@ -405,18 +405,18 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64-NEXT: li a1, 1463 ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: addiw a0, a0, -1463 +; RV64-NEXT: addi a0, a0, -1463 ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: sltiu s2, a0, 293 ; RV64-NEXT: li a1, 819 ; RV64-NEXT: mv a0, s1 ; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: addiw a0, a0, -1638 +; RV64-NEXT: addi a0, a0, -1638 ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: sltiu a0, a0, 2 -; RV64-NEXT: addiw s3, s3, -1 +; RV64-NEXT: addi s3, s3, -1 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: addiw s2, s2, -1 +; RV64-NEXT: addi s2, s2, -1 ; RV64-NEXT: andi a1, s3, 2047 ; RV64-NEXT: andi a2, s2, 2047 ; RV64-NEXT: slli a2, a2, 11 @@ -496,17 +496,17 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64M-NEXT: sltiu a1, a1, 342 ; RV64M-NEXT: li a4, 1463 ; RV64M-NEXT: mul a3, a3, a4 -; RV64M-NEXT: addiw a3, a3, -1463 +; RV64M-NEXT: addi a3, a3, -1463 ; RV64M-NEXT: andi a3, a3, 2047 ; RV64M-NEXT: sltiu a3, a3, 293 ; RV64M-NEXT: li a4, 819 ; RV64M-NEXT: mul a2, a2, a4 -; RV64M-NEXT: addiw a2, a2, -1638 +; RV64M-NEXT: addi a2, a2, -1638 ; RV64M-NEXT: andi a2, a2, 2047 ; RV64M-NEXT: sltiu a2, a2, 2 -; RV64M-NEXT: addiw a1, a1, -1 +; RV64M-NEXT: addi a1, a1, -1 ; RV64M-NEXT: addi a2, a2, -1 -; RV64M-NEXT: addiw a3, a3, -1 +; RV64M-NEXT: addi a3, a3, -1 ; RV64M-NEXT: andi a1, a1, 2047 ; RV64M-NEXT: andi a3, a3, 2047 ; RV64M-NEXT: slli a3, a3, 11 @@ -604,7 +604,7 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: vmul.vv v8, v8, v9 ; RV64MV-NEXT: vadd.vv v9, v8, v8 ; RV64MV-NEXT: lui a2, 41121 -; RV64MV-NEXT: addiw a2, a2, -1527 +; RV64MV-NEXT: addi a2, a2, -1527 ; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64MV-NEXT: vmv.s.x v10, a2 ; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index a38ae17f19df3..ac67b9005b3d0 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -755,7 +755,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV64IM-NEXT: subw a1, a1, a3 ; RV64IM-NEXT: mulhu a3, a4, a5 ; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: addi a5, a5, 1327 ; RV64IM-NEXT: mul a3, a3, a5 ; RV64IM-NEXT: subw a4, a4, a3 ; RV64IM-NEXT: sh zero, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll index e8035dd226bf6..59aa1d9ae2893 100644 --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -566,7 +566,7 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 24 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 8(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 7 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 7 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a0, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: srli a1, a1, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, 8 @@ -593,7 +593,7 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, -24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 7 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 7 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a0, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: srli a1, a1, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, 8 @@ -888,7 +888,7 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 7 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 7 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a2, a0, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: srli a2, a2, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a2, a2, 8 @@ -915,7 +915,7 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 7 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 7 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a2, a0, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: srli a2, a2, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a2, a2, 8 diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index 754237c3456dc..85d28122537ea 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -193,7 +193,7 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) { ; RV64-LABEL: saddo4.i32: ; RV64: # %bb.0: # %entry ; RV64-NEXT: lui a2, 4096 -; RV64-NEXT: addiw a2, a2, -1 +; RV64-NEXT: addi a2, a2, -1 ; RV64-NEXT: addw a2, a0, a2 ; RV64-NEXT: slt a0, a2, a0 ; RV64-NEXT: sw a2, 0(a1) @@ -211,7 +211,7 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) { ; RV64ZBA-LABEL: saddo4.i32: ; RV64ZBA: # %bb.0: # %entry ; RV64ZBA-NEXT: lui a2, 4096 -; RV64ZBA-NEXT: addiw a2, a2, -1 +; RV64ZBA-NEXT: addi a2, a2, -1 ; RV64ZBA-NEXT: addw a2, a0, a2 ; RV64ZBA-NEXT: slt a0, a2, a0 ; RV64ZBA-NEXT: sw a2, 0(a1) @@ -229,7 +229,7 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) { ; RV64ZICOND-LABEL: saddo4.i32: ; RV64ZICOND: # %bb.0: # %entry ; RV64ZICOND-NEXT: lui a2, 4096 -; RV64ZICOND-NEXT: addiw a2, a2, -1 +; RV64ZICOND-NEXT: addi a2, a2, -1 ; RV64ZICOND-NEXT: addw a2, a0, a2 ; RV64ZICOND-NEXT: slt a0, a2, a0 ; RV64ZICOND-NEXT: sw a2, 0(a1)