diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 2c2271e486a84..7e918e34f2471 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -52,6 +52,7 @@ include "RISCVSchedSiFive7.td" include "RISCVSchedSiFiveP400.td" include "RISCVSchedSiFiveP500.td" include "RISCVSchedSiFiveP600.td" +include "RISCVSchedSpacemitX60.td" include "RISCVSchedSyntacoreSCR1.td" include "RISCVSchedSyntacoreSCR345.td" include "RISCVSchedSyntacoreSCR7.td" diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 1ad94228bcbaa..6954d1bc5f73e 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -586,7 +586,7 @@ def XIANGSHAN_KUNMINGHU : RISCVProcessorModel<"xiangshan-kunminghu", TuneShiftedZExtWFusion]>; def SPACEMIT_X60 : RISCVProcessorModel<"spacemit-x60", - NoSchedModel, + SpacemitX60Model, !listconcat(RVA22S64Features, [FeatureStdExtV, FeatureStdExtSscofpmf, diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td new file mode 100644 index 0000000000000..c21ab969d12ac --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -0,0 +1,353 @@ +//=- RISCVSchedSpacemitX60.td - Spacemit X60 Scheduling Defs -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// Scheduler model for the SpacemiT-X60 processor based on documentation of the +// C908 and experiments on real hardware (bpi-f3). +// +//===----------------------------------------------------------------------===// + +def SpacemitX60Model : SchedMachineModel { + let IssueWidth = 2; // dual-issue + let MicroOpBufferSize = 0; // in-order + let LoadLatency = 5; // worse case: >= 3 + let MispredictPenalty = 9; // nine-stage + + let CompleteModel = 0; + + let UnsupportedFeatures = [HasStdExtZknd, HasStdExtZkne, HasStdExtZknh, + HasStdExtZksed, HasStdExtZksh, HasStdExtZkr]; +} + +let SchedModel = SpacemitX60Model in { + +//===----------------------------------------------------------------------===// +// Define processor resources for Spacemit-X60 + +// Information gathered from the C908 user manual: +let BufferSize = 0 in { + // The LSU supports dual issue for scalar store/load instructions + def SMX60_LS : ProcResource<2>; + + // An IEU can decode and issue two instructions at the same time + def SMX60_IEUA : ProcResource<1>; + def SMX60_IEUB : ProcResource<1>; + def SMX60_IEU : ProcResGroup<[SMX60_IEUA, SMX60_IEUB]>; + + // Although the X60 does appear to support multiple issue for at least some + // floating point instructions, this model assumes single issue as + // increasing it reduces the gains we saw in performance + def SMX60_FP : ProcResource<1>; +} + +//===----------------------------------------------------------------------===// + +// Branching +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Integer arithmetic and logic +// Latency of ALU instructions is 1, but add.uw is 2 +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Integer multiplication +def : WriteRes { let Latency = 3; } + +// The latency of mul is 5, while in mulh, mulhsu, mulhu is 6 +// Worst case latency is used +def : WriteRes { let Latency = 6; } + +// Integer division/remainder +// TODO: Latency set based on C908 datasheet and hasn't been +// confirmed experimentally. +let Latency = 12, ReleaseAtCycles = [12] in { + def : WriteRes; + def : WriteRes; +} +let Latency = 20, ReleaseAtCycles = [20] in { + def : WriteRes; + def : WriteRes; +} + +// Bitmanip +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +let Latency = 2 in { + def : WriteRes; + def : WriteRes; +} + +def : WriteRes; +def : WriteRes; +def : WriteRes; + +let Latency = 2 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Single-bit instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Memory/Atomic memory +let Latency = 3 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 5 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Atomics +let Latency = 5 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Floating point units Half precision +let Latency = 4 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} +def : WriteRes { let Latency = 5; } + +let Latency = 12, ReleaseAtCycles = [12] in { + def : WriteRes; + def : WriteRes; +} + +// Single precision +let Latency = 4 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} +def : WriteRes { let Latency = 5; } + +let Latency = 15, ReleaseAtCycles = [15] in { + def : WriteRes; + def : WriteRes; +} + +// Double precision +let Latency = 5 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; +} +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 6; } + +let Latency = 22, ReleaseAtCycles = [22] in { + def : WriteRes; + def : WriteRes; +} + +// Conversions +let Latency = 6 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 4 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 6 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + + def : WriteRes; + def : WriteRes; + def : WriteRes; + + def : WriteRes; + def : WriteRes; +} + +let Latency = 4 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Others +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Bypass and advance +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Bitmanip +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +// Single-bit instructions +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; +defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZabha; +defm : UnsupportedSchedZbkb; +defm : UnsupportedSchedZbkx; +defm : UnsupportedSchedZfa; +defm : UnsupportedSchedZvk; +defm : UnsupportedSchedSFB; +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll index 75f4b977a98b0..08cab7cd359b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll @@ -302,33 +302,33 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: .cfi_offset s4, -40 ; RV64X60-NEXT: li t0, 0 ; RV64X60-NEXT: li t1, 0 -; RV64X60-NEXT: addi t2, a7, -1 -; RV64X60-NEXT: add t4, a0, a6 -; RV64X60-NEXT: add t5, a2, a6 -; RV64X60-NEXT: add t3, a4, a6 -; RV64X60-NEXT: zext.w s0, t2 -; RV64X60-NEXT: mul s1, a1, s0 -; RV64X60-NEXT: add t4, t4, s1 -; RV64X60-NEXT: mul s1, a3, s0 -; RV64X60-NEXT: add t5, t5, s1 +; RV64X60-NEXT: addi s1, a7, -1 +; RV64X60-NEXT: zext.w s1, s1 +; RV64X60-NEXT: mul t2, a1, s1 +; RV64X60-NEXT: mul t3, a3, s1 +; RV64X60-NEXT: mul t4, a5, s1 +; RV64X60-NEXT: add s0, a0, a6 +; RV64X60-NEXT: add s1, a2, a6 +; RV64X60-NEXT: add t5, a4, a6 +; RV64X60-NEXT: add s0, s0, t2 ; RV64X60-NEXT: csrr t2, vlenb -; RV64X60-NEXT: mul s1, a5, s0 ; RV64X60-NEXT: add t3, t3, s1 -; RV64X60-NEXT: sltu s1, a0, t5 -; RV64X60-NEXT: sltu s0, a2, t4 -; RV64X60-NEXT: and t6, s1, s0 -; RV64X60-NEXT: li t5, 32 -; RV64X60-NEXT: sltu s1, a0, t3 -; RV64X60-NEXT: sltu s0, a4, t4 -; RV64X60-NEXT: and t3, s1, s0 -; RV64X60-NEXT: or s1, a1, a3 -; RV64X60-NEXT: slti s1, s1, 0 -; RV64X60-NEXT: or t4, t6, s1 -; RV64X60-NEXT: or s0, a1, a5 -; RV64X60-NEXT: slti s0, s0, 0 -; RV64X60-NEXT: or s0, t3, s0 +; RV64X60-NEXT: li t6, 32 +; RV64X60-NEXT: add t4, t4, t5 +; RV64X60-NEXT: sltu t3, a0, t3 +; RV64X60-NEXT: sltu s1, a2, s0 +; RV64X60-NEXT: and t3, t3, s1 +; RV64X60-NEXT: or t5, a1, a3 +; RV64X60-NEXT: sltu s1, a0, t4 +; RV64X60-NEXT: sltu s0, a4, s0 +; RV64X60-NEXT: slti t4, t5, 0 +; RV64X60-NEXT: and s0, s0, s1 +; RV64X60-NEXT: or s1, a1, a5 +; RV64X60-NEXT: or t4, t3, t4 ; RV64X60-NEXT: slli t3, t2, 1 -; RV64X60-NEXT: maxu s1, t3, t5 +; RV64X60-NEXT: slti s1, s1, 0 +; RV64X60-NEXT: or s0, s0, s1 +; RV64X60-NEXT: maxu s1, t3, t6 ; RV64X60-NEXT: or s0, t4, s0 ; RV64X60-NEXT: sltu s1, a6, s1 ; RV64X60-NEXT: or s0, s0, s1 @@ -339,8 +339,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1 ; RV64X60-NEXT: add t5, t5, a1 ; RV64X60-NEXT: add a2, a2, a3 -; RV64X60-NEXT: add a4, a4, a5 ; RV64X60-NEXT: addiw t1, t1, 1 +; RV64X60-NEXT: add a4, a4, a5 ; RV64X60-NEXT: addi t0, t0, 1 ; RV64X60-NEXT: beq t1, a7, .LBB0_11 ; RV64X60-NEXT: .LBB0_4: # %for.cond1.preheader.us @@ -367,10 +367,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: vl2r.v v8, (s2) ; RV64X60-NEXT: vl2r.v v10, (s3) ; RV64X60-NEXT: sub s1, s1, t3 -; RV64X60-NEXT: add s3, s3, t3 ; RV64X60-NEXT: vaaddu.vv v8, v8, v10 ; RV64X60-NEXT: vs2r.v v8, (s4) ; RV64X60-NEXT: add s4, s4, t3 +; RV64X60-NEXT: add s3, s3, t3 ; RV64X60-NEXT: add s2, s2, t3 ; RV64X60-NEXT: bnez s1, .LBB0_7 ; RV64X60-NEXT: # %bb.8: # %middle.block diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s new file mode 100644 index 0000000000000..ceab015e27203 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s @@ -0,0 +1,312 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=spacemit-x60 -iterations=1 -instruction-tables=full < %s | FileCheck %s + +# Zalrsc +lr.w t0, (t1) +lr.w.aq t1, (t2) +lr.w.rl t2, (t3) +lr.w.aqrl t3, (t4) +sc.w t6, t5, (t4) +sc.w.aq t5, t4, (t3) +sc.w.rl t4, t3, (t2) +sc.w.aqrl t3, t2, (t1) + +lr.d t0, (t1) +lr.d.aq t1, (t2) +lr.d.rl t2, (t3) +lr.d.aqrl t3, (t4) +sc.d t6, t5, (t4) +sc.d.aq t5, t4, (t3) +sc.d.rl t4, t3, (t2) +sc.d.aqrl t3, t2, (t1) + +# Zaamo +amoswap.w a4, ra, (s0) +amoadd.w a1, a2, (a3) +amoxor.w a2, a3, (a4) +amoand.w a3, a4, (a5) +amoor.w a4, a5, (a6) +amomin.w a5, a6, (a7) +amomax.w s7, s6, (s5) +amominu.w s6, s5, (s4) +amomaxu.w s5, s4, (s3) + +amoswap.w.aq a4, ra, (s0) +amoadd.w.aq a1, a2, (a3) +amoxor.w.aq a2, a3, (a4) +amoand.w.aq a3, a4, (a5) +amoor.w.aq a4, a5, (a6) +amomin.w.aq a5, a6, (a7) +amomax.w.aq s7, s6, (s5) +amominu.w.aq s6, s5, (s4) +amomaxu.w.aq s5, s4, (s3) + +amoswap.w.rl a4, ra, (s0) +amoadd.w.rl a1, a2, (a3) +amoxor.w.rl a2, a3, (a4) +amoand.w.rl a3, a4, (a5) +amoor.w.rl a4, a5, (a6) +amomin.w.rl a5, a6, (a7) +amomax.w.rl s7, s6, (s5) +amominu.w.rl s6, s5, (s4) +amomaxu.w.rl s5, s4, (s3) + +amoswap.w.aqrl a4, ra, (s0) +amoadd.w.aqrl a1, a2, (a3) +amoxor.w.aqrl a2, a3, (a4) +amoand.w.aqrl a3, a4, (a5) +amoor.w.aqrl a4, a5, (a6) +amomin.w.aqrl a5, a6, (a7) +amomax.w.aqrl s7, s6, (s5) +amominu.w.aqrl s6, s5, (s4) +amomaxu.w.aqrl s5, s4, (s3) + +amoswap.d a4, ra, (s0) +amoadd.d a1, a2, (a3) +amoxor.d a2, a3, (a4) +amoand.d a3, a4, (a5) +amoor.d a4, a5, (a6) +amomin.d a5, a6, (a7) +amomax.d s7, s6, (s5) +amominu.d s6, s5, (s4) +amomaxu.d s5, s4, (s3) + +amoswap.d.aq a4, ra, (s0) +amoadd.d.aq a1, a2, (a3) +amoxor.d.aq a2, a3, (a4) +amoand.d.aq a3, a4, (a5) +amoor.d.aq a4, a5, (a6) +amomin.d.aq a5, a6, (a7) +amomax.d.aq s7, s6, (s5) +amominu.d.aq s6, s5, (s4) +amomaxu.d.aq s5, s4, (s3) + +amoswap.d.rl a4, ra, (s0) +amoadd.d.rl a1, a2, (a3) +amoxor.d.rl a2, a3, (a4) +amoand.d.rl a3, a4, (a5) +amoor.d.rl a4, a5, (a6) +amomin.d.rl a5, a6, (a7) +amomax.d.rl s7, s6, (s5) +amominu.d.rl s6, s5, (s4) +amomaxu.d.rl s5, s4, (s3) + +amoswap.d.aqrl a4, ra, (s0) +amoadd.d.aqrl a1, a2, (a3) +amoxor.d.aqrl a2, a3, (a4) +amoand.d.aqrl a3, a4, (a5) +amoor.d.aqrl a4, a5, (a6) +amomin.d.aqrl a5, a6, (a7) +amomax.d.aqrl s7, s6, (s5) +amominu.d.aqrl s6, s5, (s4) +amomaxu.d.aqrl s5, s4, (s3) + +# CHECK: Resources: +# CHECK-NEXT: [0] - SMX60_FP:1 +# CHECK-NEXT: [1] - SMX60_IEU:2 SMX60_IEUA, SMX60_IEUB +# CHECK-NEXT: [2] - SMX60_IEUA:1 +# CHECK-NEXT: [3] - SMX60_IEUB:1 +# CHECK-NEXT: [4] - SMX60_LS:2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) +# CHECK-NEXT: [7]: Bypass Latency +# CHECK-NEXT: [8]: Resources ( | [] | [, | [] | [, | [] | [,