From 7251c19632d5f8f5adb03e21c01903c4fd1dd59a Mon Sep 17 00:00:00 2001 From: TIHan Date: Fri, 8 Dec 2023 16:54:59 -0800 Subject: [PATCH 01/37] Initial work --- src/coreclr/jit/codegenarm64.cpp | 12 ++- src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/emitarm64.cpp | 155 ++++++++++++++++++++++++++++++ src/coreclr/jit/emitarm64.h | 8 ++ 4 files changed, 175 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index a1133331cf9c97..2837d8b3ccca91 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5436,7 +5436,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) #define ALL_ARM64_EMITTER_UNIT_TESTS // #define ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // #define ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -// #define ALL_ARM64_EMITTER_UNIT_TESTS_SVE +#define ALL_ARM64_EMITTER_UNIT_TESTS_SVE // #define ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED #if defined(DEBUG) @@ -10654,6 +10654,16 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_fsqrt, EA_SCALABLE, REG_V6, REG_P6, REG_V6, INS_OPTS_SCALABLE_S); /* FSQRT ., /M, . */ + // IF_SVE_IL_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1d, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + INS_OPTS_SCALABLE_D); // LDNF1D {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + INS_OPTS_SCALABLE_D); // LDNF1SW {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 5, + INS_OPTS_SCALABLE_D); // LDNF1D {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sw, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 5, + INS_OPTS_SCALABLE_D); // LDNF1SW {.D }, /Z, [{, #, MUL VL}] + #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 7a96748a50a2c6..95c965178aae6a 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2096,7 +2096,7 @@ void CodeGen::genEmitUnwindDebugGCandEH() #endif // LATE_DISASM #ifdef DEBUG - if (JitConfig.JitRawHexCode().contains(compiler->info.compMethodHnd, compiler->info.compClassHnd, + if (compiler->opts.altJit && JitConfig.JitRawHexCode().contains(compiler->info.compMethodHnd, compiler->info.compClassHnd, &compiler->info.compMethodInfo->args)) { BYTE* addr = (BYTE*)*codePtr + compiler->GetEmitter()->writeableOffset; diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index aa81f83634ccf2..53b8c3b0c71da6 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1193,6 +1193,47 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isValidSimm4(emitGetInsSC(id))); // iiii + assert(isScalableVectorSize(elemsize)); + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -9367,6 +9408,16 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_DV_3AI; break; + case INS_sve_ldnf1sw: + case INS_sve_ldnf1d: + assert(insOptsScalable(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IL_3A; + break; + default: unreached(); break; @@ -12609,6 +12660,17 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * TODO + */ + +/*static*/ emitter::code_t emitter::insEncodeSimm4_19_to_16(ssize_t imm) +{ + assert(isValidSimm4(imm)); + return (code_t)imm << 16; +} + BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id) { instruction ins = id->idIns(); @@ -14640,6 +14702,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn code |= insEncodeSveElemsize_tszh_22_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx // x + dst += emitOutput_Instr(dst, code); break; case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations @@ -14655,6 +14718,47 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_R_9_to_5(id->idReg3()); // ggg + code |= insEncodeReg_P_12_to_10(id->idReg2()); // nnnnn + code |= insEncodeSimm4_19_to_16(imm); // iiii + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -17028,6 +17132,52 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg1(), id->idInsOpt(), false); // ddddd break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + // { .D }, /Z, [{, #, MUL VL}] + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + imm = emitGetInsSC(id); + emitDispSveRegList(id->idReg1(), 1, id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, true); // ggg + printf("["); + emitDispReg(id->idReg3(), optGetSveElemsize(id->idInsOpt()), imm != 0); // nnnnn + if (imm != 0) + { + emitDispImm(emitGetInsSC(id), true); // iiii + printf("mul vl"); + } + printf("]"); + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -19515,6 +19665,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index d70ea28472cac9..b68f4d4572486e 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -472,6 +472,8 @@ static code_t insEncodeSveElemsize(insOpts opt); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); +static code_t insEncodeSimm4_19_to_16(ssize_t imm); + // Returns true if 'reg' represents an integer register. static bool isIntegerRegister(regNumber reg) { @@ -484,6 +486,12 @@ static bool isStackRegister(regNumber reg) return (reg == REG_ZR) || (reg == REG_FP); } // ZR (R31) encodes the SP register +// Returns true if 'value' is a legal signed immediate 4 bit encoding (such as for LDNF1SW). +static bool isValidSimm4(ssize_t value) +{ + return (-8 <= value) && (value <= 7); +}; + // Returns true if 'value' is a legal unsigned immediate 5 bit encoding (such as for CCMP). static bool isValidUimm5(ssize_t value) { From bf05a314b22f4d03bf301ce4f52648d9c1bec545 Mon Sep 17 00:00:00 2001 From: TIHan Date: Fri, 8 Dec 2023 17:04:47 -0800 Subject: [PATCH 02/37] Added another format --- src/coreclr/jit/codegenarm64.cpp | 6 ++++++ src/coreclr/jit/emitarm64.cpp | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 2837d8b3ccca91..1050de5b4eb5da 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10654,6 +10654,12 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_fsqrt, EA_SCALABLE, REG_V6, REG_P6, REG_V6, INS_OPTS_SCALABLE_S); /* FSQRT ., /M, . */ + // IF_SVE_IH_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + INS_OPTS_SCALABLE_D); // LD1D {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 5, + INS_OPTS_SCALABLE_D); // LD1D {.D }, /Z, [{, #, MUL VL}] + // IF_SVE_IL_3A theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1d, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, INS_OPTS_SCALABLE_D); // LDNF1D {.D }, /Z, [{, #, MUL VL}] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 53b8c3b0c71da6..ff269f5c4b2c90 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9408,6 +9408,16 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_DV_3AI; break; + case INS_sve_ld1d: + assert(insOptsScalable(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IH_3A; + break; + + case INS_sve_ldnf1sw: case INS_sve_ldnf1d: assert(insOptsScalable(opt)); @@ -19665,6 +19675,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) result.insThroughput = PERFSCORE_THROUGHPUT_3C; result.insLatency = PERFSCORE_LATENCY_6C; From 050ef3b5af45b512fbac0f303dc8318cd1c66d92 Mon Sep 17 00:00:00 2001 From: TIHan Date: Fri, 8 Dec 2023 17:09:42 -0800 Subject: [PATCH 03/37] Fix-up comment --- src/coreclr/jit/emitarm64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ff269f5c4b2c90..7660764c4d5806 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -14763,8 +14763,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt - code |= insEncodeReg_R_9_to_5(id->idReg3()); // ggg - code |= insEncodeReg_P_12_to_10(id->idReg2()); // nnnnn + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg code |= insEncodeSimm4_19_to_16(imm); // iiii dst += emitOutput_Instr(dst, code); break; From 8775968dd607058d67f0b38451e60b6c3477450b Mon Sep 17 00:00:00 2001 From: TIHan Date: Fri, 8 Dec 2023 21:22:43 -0800 Subject: [PATCH 04/37] Added another format --- src/coreclr/jit/codegenarm64.cpp | 6 ++++-- src/coreclr/jit/emitarm64.cpp | 35 ++++++++++++++++++++++++++++---- src/coreclr/jit/emitarm64.h | 2 +- src/coreclr/jit/instr.h | 1 + 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 1050de5b4eb5da..689e1fd7dd2fb3 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10655,10 +10655,12 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_S); /* FSQRT ., /M, . */ // IF_SVE_IH_3A - theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V5, REG_P3, REG_R4, 0, INS_OPTS_SCALABLE_D); // LD1D {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IH_3A_A theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 5, - INS_OPTS_SCALABLE_D); // LD1D {.D }, /Z, [{, #, MUL VL}] + INS_OPTS_SCALABLE_Q); // LD1D {.Q }, /Z, [{, #, MUL VL}] // IF_SVE_IL_3A theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1d, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 7660764c4d5806..1cb193292c7de5 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1226,7 +1226,16 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); +#ifdef DEBUG + if (id->idInsFmt() == IF_SVE_IH_3A_A) + { + assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); + } + else + { + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + } +#endif // DEBUG assert(isVectorRegister(id->idReg1())); // ttttt assert(isPredicateRegister(id->idReg2())); // ggg assert(isGeneralRegister(id->idReg3())); // nnnnn @@ -5342,6 +5351,9 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) case INS_OPTS_SCALABLE_D_WITH_SCALAR: return EA_8BYTE; + case INS_OPTS_SCALABLE_Q: + return EA_16BYTE; + default: assert(!"Invalid insOpt for vector register"); return EA_UNKNOWN; @@ -9414,10 +9426,16 @@ void emitter::emitIns_R_R_R_I(instruction ins, assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); - fmt = IF_SVE_IH_3A; + if (opt == INS_OPTS_SCALABLE_Q) + { + fmt = IF_SVE_IH_3A_A; + } + else + { + fmt = IF_SVE_IH_3A; + } break; - case INS_sve_ldnf1sw: case INS_sve_ldnf1d: assert(insOptsScalable(opt)); @@ -15436,6 +15454,9 @@ void emitter::emitDispArrangement(insOpts opt) case INS_OPTS_SCALABLE_D_WITH_SCALAR: str = "d"; break; + case INS_OPTS_SCALABLE_Q: + str = "q"; + break; default: assert(!"Invalid insOpt for vector register"); @@ -17179,7 +17200,7 @@ void emitter::emitDispInsHelp( emitDispSveRegList(id->idReg1(), 1, id->idInsOpt(), true); // ttttt emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, true); // ggg printf("["); - emitDispReg(id->idReg3(), optGetSveElemsize(id->idInsOpt()), imm != 0); // nnnnn + emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn if (imm != 0) { emitDispImm(emitGetInsSC(id), true); // iiii @@ -19681,6 +19702,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_9C; break; + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) result.insThroughput = PERFSCORE_THROUGHPUT_3C; result.insLatency = PERFSCORE_LATENCY_6C; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index b68f4d4572486e..6df7efda88cbf3 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -894,7 +894,7 @@ inline static bool insOptsScalableSimple(insOpts opt) { // `opt` is any of the standard scalable types. return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || - (opt == INS_OPTS_SCALABLE_D)); + (opt == INS_OPTS_SCALABLE_D) || (opt == INS_OPTS_SCALABLE_Q)); } inline static bool insOptsScalableWords(insOpts opt) diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 08f28dfe74270b..9d72a9d1068c80 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -274,6 +274,7 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_H, INS_OPTS_SCALABLE_S, INS_OPTS_SCALABLE_D, + INS_OPTS_SCALABLE_Q, INS_OPTS_SCALABLE_WIDE_B, INS_OPTS_SCALABLE_WIDE_H, From dc0ab4d9076c2e71724e201448772e731bdb8875 Mon Sep 17 00:00:00 2001 From: TIHan Date: Sat, 9 Dec 2023 13:43:49 -0800 Subject: [PATCH 05/37] Added more formats --- src/coreclr/jit/codegenarm64.cpp | 24 ++++++++++++ src/coreclr/jit/emitarm64.cpp | 65 ++++++++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 689e1fd7dd2fb3..7a66bb93e34a49 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10662,6 +10662,30 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 5, INS_OPTS_SCALABLE_Q); // LD1D {.Q }, /Z, [{, #, MUL VL}] + // IF_SVE_IH_3A_F + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 3, + INS_OPTS_SCALABLE_D); // LD1W {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IJ_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sw, EA_SCALABLE, REG_V0, REG_P5, REG_R3, 4, + INS_OPTS_SCALABLE_D); // LD1SW {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IJ_3A_D + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V3, REG_P0, REG_R2, 6, + INS_OPTS_SCALABLE_D); // LD1SB {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IJ_3A_E + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P1, REG_R3, 7, + INS_OPTS_SCALABLE_D); // LD1B {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IJ_3A_F + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V7, REG_P3, REG_R5, 2, + INS_OPTS_SCALABLE_D); // LD1SH {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IJ_3A_G + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R6, 1, + INS_OPTS_SCALABLE_D); // LD1H {.D }, /Z, [{, #, MUL VL}] + // IF_SVE_IL_3A theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1d, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, INS_OPTS_SCALABLE_D); // LDNF1D {.D }, /Z, [{, #, MUL VL}] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 1cb193292c7de5..763a996da14149 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9436,6 +9436,60 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; + case INS_sve_ld1w: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IH_3A_F; + break; + + case INS_sve_ld1sw: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A; + break; + + case INS_sve_ld1sb: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_D; + break; + + case INS_sve_ld1b: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_E; + break; + + case INS_sve_ld1sh: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_F; + break; + + case INS_sve_ld1h: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_G; + break; + case INS_sve_ldnf1sw: case INS_sve_ldnf1d: assert(insOptsScalable(opt)); @@ -19698,12 +19752,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; - break; - case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_9C; break; From 2318532f0ed45751756566cfcaea292c23bcaaf1 Mon Sep 17 00:00:00 2001 From: TIHan Date: Sat, 9 Dec 2023 14:08:34 -0800 Subject: [PATCH 06/37] Added more formats --- src/coreclr/jit/codegenarm64.cpp | 18 +++++++++++++- src/coreclr/jit/emitarm64.cpp | 40 ++++++++++++++++++++++++++++++-- src/coreclr/jit/emitarm64.h | 4 ++-- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 7a66bb93e34a49..efdb232ce0c7fd 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10694,7 +10694,23 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 5, INS_OPTS_SCALABLE_D); // LDNF1D {.D }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sw, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 5, - INS_OPTS_SCALABLE_D); // LDNF1SW {.D }, /Z, [{, #, MUL VL}] + INS_OPTS_SCALABLE_D); // LDNF1SW {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IL_3A_A + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sh, EA_SCALABLE, REG_V0, REG_P1, REG_R5, 5, + INS_OPTS_SCALABLE_D); // LDNF1SH {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1w, EA_SCALABLE, REG_V0, REG_P2, REG_R4, 5, + INS_OPTS_SCALABLE_D); // LDNF1W {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IL_3A_B + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1h, EA_SCALABLE, REG_V1, REG_P3, REG_R2, 5, + INS_OPTS_SCALABLE_D); // LDNF1H {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sb, EA_SCALABLE, REG_V0, REG_P4, REG_R1, 5, + INS_OPTS_SCALABLE_D); // LDNF1SB {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IL_3A_C + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 5, + INS_OPTS_SCALABLE_D); // LDNF1B {.D }, /Z, [{, #, MUL VL}] #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 763a996da14149..949b1573faa054 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1233,6 +1233,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) } else { + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); } #endif // DEBUG @@ -9500,6 +9501,35 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_IL_3A; break; + case INS_sve_ldnf1sh: + case INS_sve_ldnf1w: + assert(insOptsScalable(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IL_3A_A; + break; + + case INS_sve_ldnf1h: + case INS_sve_ldnf1sb: + assert(insOptsScalable(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IL_3A_B; + break; + + case INS_sve_ldnf1b: + assert(insOptsScalable(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IL_3A_C; + break; + default: unreached(); break; @@ -19750,8 +19780,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; - case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus - // immediate) + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus @@ -19766,6 +19796,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) result.insThroughput = PERFSCORE_THROUGHPUT_3C; result.insLatency = PERFSCORE_LATENCY_6C; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 6df7efda88cbf3..d454b451ad77ff 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -887,14 +887,14 @@ inline static bool insOptsScalable(insOpts opt) { // Opt is any of the scalable types. return ((insOptsScalableSimple(opt)) || (insOptsScalableWide(opt)) || (insOptsScalableWithSimdScalar(opt)) || - (insOptsScalableWithScalar(opt)) || (insOptsScalableWithSimdVector(opt))); + (insOptsScalableWithScalar(opt)) || (insOptsScalableWithSimdVector(opt)) || (opt == INS_OPTS_SCALABLE_Q)); } inline static bool insOptsScalableSimple(insOpts opt) { // `opt` is any of the standard scalable types. return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || - (opt == INS_OPTS_SCALABLE_D) || (opt == INS_OPTS_SCALABLE_Q)); + (opt == INS_OPTS_SCALABLE_D)); } inline static bool insOptsScalableWords(insOpts opt) From 3ebbb3d4b02ad7f7efb18ac17db46763ff4791d5 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 11:04:57 -0800 Subject: [PATCH 07/37] Properly encoding ld1w --- src/coreclr/jit/codegenarm64.cpp | 4 ++ src/coreclr/jit/emitarm64.cpp | 66 ++++++++++++++++++++++++++------ src/coreclr/jit/emitarm64.h | 10 +++++ src/coreclr/jit/instrsarm64sve.h | 2 +- 4 files changed, 69 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index efdb232ce0c7fd..7979aabbfa44cc 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10663,8 +10663,12 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_Q); // LD1D {.Q }, /Z, [{, #, MUL VL}] // IF_SVE_IH_3A_F + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 3, + INS_OPTS_SCALABLE_S); // LD1W {.S }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 3, INS_OPTS_SCALABLE_D); // LD1W {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 3, + INS_OPTS_SCALABLE_Q); // LD1W {.Q }, /Z, [{, #, MUL VL}] // IF_SVE_IJ_3A theEmitter->emitIns_R_R_R_I(INS_sve_ld1sw, EA_SCALABLE, REG_V0, REG_P5, REG_R3, 4, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 949b1573faa054..4e0915e13016ae 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1226,17 +1226,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) elemsize = id->idOpSize(); -#ifdef DEBUG - if (id->idInsFmt() == IF_SVE_IH_3A_A) - { - assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); - } - else - { - - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - } -#endif // DEBUG + assert(insOptsScalable(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ttttt assert(isPredicateRegister(id->idReg2())); // ggg assert(isGeneralRegister(id->idReg3())); // nnnnn @@ -9433,12 +9423,13 @@ void emitter::emitIns_R_R_R_I(instruction ins, } else { + assert(opt == INS_OPTS_SCALABLE_D); fmt = IF_SVE_IH_3A; } break; case INS_sve_ld1w: - assert(opt == INS_OPTS_SCALABLE_D); + assert(insOptsScalableWordsOrQuadwords(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); @@ -12772,6 +12763,51 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/*static*/ bool emitter::canEncodeSveElemsize_dtype(insFormat fmt) +{ + switch (fmt) + { + case IF_SVE_IH_3A_F: + return true; + + default: + return false; + } +} + +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + * This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype(insFormat fmt, emitAttr size, code_t code) +{ + assert(canEncodeSveElemsize_dtype(fmt)); + switch (fmt) + { + case IF_SVE_IH_3A_F: + switch (size) + { + case EA_4BYTE: + return code & ~(1 << 21); // Set bit '21' to 0. + + case EA_8BYTE: + return code; // By default, the instruction already encodes 64-bit. + + case EA_16BYTE: + return (code & ~((1 << 22) | (1 << 21) | (1 << 15))) | (1 << 20); // Set bits '22', '21' and '15' to 0. Set bit '20' to 1. + + default: + assert(!"Invalid size for encoding dtype."); + } + + default: + assert(!"Invalid format for encoding dtype."); + } + return code; +} + /***************************************************************************** * * TODO @@ -14868,6 +14904,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg code |= insEncodeSimm4_19_to_16(imm); // iiii + + if (canEncodeSveElemsize_dtype(fmt)) + { + code = insEncodeSveElemsize_dtype(fmt, optGetSveElemsize(id->idInsOpt()), code); + } + dst += emitOutput_Instr(dst, code); break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index d454b451ad77ff..d9f6dcf59addf2 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -472,6 +472,10 @@ static code_t insEncodeSveElemsize(insOpts opt); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); +static bool canEncodeSveElemsize_dtype(insFormat fmt); + +static code_t insEncodeSveElemsize_dtype(insFormat fmt, emitAttr size, code_t code); + static code_t insEncodeSimm4_19_to_16(ssize_t imm); // Returns true if 'reg' represents an integer register. @@ -903,6 +907,12 @@ inline static bool insOptsScalableWords(insOpts opt) return ((opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } +inline static bool insOptsScalableWordsOrQuadwords(insOpts opt) +{ + // `opt` is any of the standard word, quadword and above scalable types. + return (insOptsScalableWords(opt) || (opt == INS_OPTS_SCALABLE_Q)); +} + inline static bool insOptsScalableAtLeastHalf(insOpts opt) { // `opt` is any of the standard half and above scalable types. diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index d10b075ad09467..e78399518ef4d5 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -132,7 +132,7 @@ INST9(ld1h, "ld1h", 0, IF_SV // enum name info SVE_HW_4A SVE_HW_4A_A SVE_HW_4A_B SVE_HW_4A_C SVE_HW_4B SVE_HW_4B_D SVE_HX_3A_E SVE_IH_3A_F SVE_II_4A_H -INST9(ld1w, "ld1w", 0, IF_SVE_9C, 0x85204000, 0xC5204000, 0xC5004000, 0x85004000, 0xC560C000, 0xC540C000, 0x8520C000, 0xA5002000, 0xA5000000 ) +INST9(ld1w, "ld1w", 0, IF_SVE_9C, 0x85204000, 0xC5204000, 0xC5004000, 0x85004000, 0xC560C000, 0xC540C000, 0x8520C000, 0xA560A000, 0xA5000000 ) // LD1W {.S }, /Z, [, .S, #2] SVE_HW_4A 100001010h1mmmmm 010gggnnnnnttttt 8520 4000 // LD1W {.D }, /Z, [, .D, #2] SVE_HW_4A_A 110001010h1mmmmm 010gggnnnnnttttt C520 4000 // LD1W {.D }, /Z, [, .D, ] SVE_HW_4A_B 110001010h0mmmmm 010gggnnnnnttttt C500 4000 From 0ef3323dada1a95e99c68620a1caac702330308e Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 11:19:35 -0800 Subject: [PATCH 08/37] Encoding ld1sb properly --- src/coreclr/jit/codegenarm64.cpp | 4 ++++ src/coreclr/jit/emitarm64.cpp | 19 ++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 7979aabbfa44cc..7268a10dee64c8 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10675,6 +10675,10 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_D); // LD1SW {.D }, /Z, [{, #, MUL VL}] // IF_SVE_IJ_3A_D + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V3, REG_P0, REG_R2, 6, + INS_OPTS_SCALABLE_H); // LD1SB {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V3, REG_P0, REG_R2, 6, + INS_OPTS_SCALABLE_S); // LD1SB {.S }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V3, REG_P0, REG_R2, 6, INS_OPTS_SCALABLE_D); // LD1SB {.D }, /Z, [{, #, MUL VL}] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 4e0915e13016ae..5b56ac082350b3 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9447,7 +9447,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; case INS_sve_ld1sb: - assert(opt == INS_OPTS_SCALABLE_D); + assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); @@ -12768,6 +12768,7 @@ void emitter::emitIns_Call(EmitCallType callType, switch (fmt) { case IF_SVE_IH_3A_F: + case IF_SVE_IJ_3A_D: return true; default: @@ -12802,6 +12803,22 @@ void emitter::emitIns_Call(EmitCallType callType, assert(!"Invalid size for encoding dtype."); } + case IF_SVE_IJ_3A_D: + switch (size) + { + case EA_2BYTE: + return code | (1 << 22); // Set bit '22' to 1. + + case EA_4BYTE: + return code | (1 << 21); // Set bit '21' to 1. + + case EA_8BYTE: + return code; // By default, the instruction already encodes 64-bit. + + default: + assert(!"Invalid size for encoding dtype."); + } + default: assert(!"Invalid format for encoding dtype."); } From 523a544000fecd24361f38dc2121dd9a2f8516e1 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 11:41:39 -0800 Subject: [PATCH 09/37] Properly encoding ld1b --- src/coreclr/jit/codegenarm64.cpp | 6 ++++++ src/coreclr/jit/emitarm64.cpp | 19 ++++++++++++++++++- src/coreclr/jit/instrsarm64sve.h | 6 +++--- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 7268a10dee64c8..5fe8a23d089375 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10683,6 +10683,12 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_D); // LD1SB {.D }, /Z, [{, #, MUL VL}] // IF_SVE_IJ_3A_E + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P1, REG_R3, 7, + INS_OPTS_SCALABLE_B); // LD1B {.B }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P1, REG_R3, 7, + INS_OPTS_SCALABLE_H); // LD1B {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P1, REG_R3, 7, + INS_OPTS_SCALABLE_S); // LD1B {.S }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P1, REG_R3, 7, INS_OPTS_SCALABLE_D); // LD1B {.D }, /Z, [{, #, MUL VL}] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5b56ac082350b3..79743adc9127e5 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9456,7 +9456,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; case INS_sve_ld1b: - assert(opt == INS_OPTS_SCALABLE_D); + assert(insOptsScalableSimple(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); @@ -12769,6 +12769,7 @@ void emitter::emitIns_Call(EmitCallType callType, { case IF_SVE_IH_3A_F: case IF_SVE_IJ_3A_D: + case IF_SVE_IJ_3A_E: return true; default: @@ -12819,6 +12820,22 @@ void emitter::emitIns_Call(EmitCallType callType, assert(!"Invalid size for encoding dtype."); } + case IF_SVE_IJ_3A_E: + switch (size) + { + case EA_1BYTE: + return code & ~((1 << 22) | (1 << 21)); // Set bit '22' and '21' to 0. + + case EA_2BYTE: + return code & ~(1 << 22); // Set bit '22' to 0. + + case EA_4BYTE: + return code & ~(1 << 21); // Set bit '21' to 0. + + case EA_8BYTE: + return code; // By default, the instruction already encodes 64-bit. + } + default: assert(!"Invalid format for encoding dtype."); } diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index e78399518ef4d5..01ecba92d33417 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -140,7 +140,7 @@ INST9(ld1w, "ld1w", 0, IF_SV // LD1W {.D }, /Z, [, .D, LSL #2] SVE_HW_4B 11000101011mmmmm 110gggnnnnnttttt C560 C000 // LD1W {.D }, /Z, [, .D] SVE_HW_4B_D 11000101010mmmmm 110gggnnnnnttttt C540 C000 // LD1W {.D }, /Z, [.D{, #}] SVE_HX_3A_E 10000101001iiiii 110gggnnnnnttttt 8520 C000 - // LD1W {.D }, /Z, [{, #, MUL VL}] SVE_IH_3A_F 101001010000iiii 001gggnnnnnttttt A500 2000 + // LD1W {.D }, /Z, [{, #, MUL VL}] SVE_IH_3A_F 101001010110iiii 101gggnnnnnttttt A560 A000 // LD1W {.D }, /Z, [, , LSL #2] SVE_II_4A_H 10100101000mmmmm 000gggnnnnnttttt A500 0000 @@ -271,12 +271,12 @@ INST6(ld1sb, "ld1sb", 0, IF_SV // enum name info SVE_HW_4A SVE_HW_4A_A SVE_HW_4B SVE_HX_3A_B SVE_IJ_3A_E SVE_IK_4A_H -INST6(ld1b, "ld1b", 0, IF_SVE_6D, 0xC4004000, 0x84004000, 0xC440C000, 0x8420C000, 0xA400A000, 0xA4004000 ) +INST6(ld1b, "ld1b", 0, IF_SVE_6D, 0xC4004000, 0x84004000, 0xC440C000, 0x8420C000, 0xA460A000, 0xA4004000 ) // LD1B {.D }, /Z, [, .D, ] SVE_HW_4A 110001000h0mmmmm 010gggnnnnnttttt C400 4000 // LD1B {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 010gggnnnnnttttt 8400 4000 // LD1B {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 110gggnnnnnttttt C440 C000 // LD1B {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 110gggnnnnnttttt 8420 C000 - // LD1B {.D }, /Z, [{, #, MUL VL}] SVE_IJ_3A_E 101001000000iiii 101gggnnnnnttttt A400 A000 + // LD1B {.D }, /Z, [{, #, MUL VL}] SVE_IJ_3A_E 101001000110iiii 101gggnnnnnttttt A460 A000 // LD1B {.D }, /Z, [, ] SVE_IK_4A_H 10100100000mmmmm 010gggnnnnnttttt A400 4000 From 08ca147f3496d1aeaee7aaaf672d2ef3152475eb Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 11:49:32 -0800 Subject: [PATCH 10/37] Properly encoding ld1sh --- src/coreclr/jit/codegenarm64.cpp | 2 ++ src/coreclr/jit/emitarm64.cpp | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 5fe8a23d089375..1335da349d0130 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10693,6 +10693,8 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_D); // LD1B {.D }, /Z, [{, #, MUL VL}] // IF_SVE_IJ_3A_F + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V7, REG_P3, REG_R5, 2, + INS_OPTS_SCALABLE_S); // LD1SH {.S }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V7, REG_P3, REG_R5, 2, INS_OPTS_SCALABLE_D); // LD1SH {.D }, /Z, [{, #, MUL VL}] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 79743adc9127e5..72651b06ce324c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9465,7 +9465,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; case INS_sve_ld1sh: - assert(opt == INS_OPTS_SCALABLE_D); + assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); @@ -12770,6 +12770,7 @@ void emitter::emitIns_Call(EmitCallType callType, case IF_SVE_IH_3A_F: case IF_SVE_IJ_3A_D: case IF_SVE_IJ_3A_E: + case IF_SVE_IJ_3A_F: return true; default: @@ -12834,6 +12835,22 @@ void emitter::emitIns_Call(EmitCallType callType, case EA_8BYTE: return code; // By default, the instruction already encodes 64-bit. + + default: + assert(!"Invalid size for encoding dtype."); + } + + case IF_SVE_IJ_3A_F: + switch (size) + { + case EA_4BYTE: + return code | (1 << 21); // Set bit '21' to 1. + + case EA_8BYTE: + return code; // By default, the instruction already encodes 64-bit. + + default: + assert(!"Invalid size for encoding dtype."); } default: From e8aaf876cc98ae39a80fde37153fc5f723d3a836 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 12:08:25 -0800 Subject: [PATCH 11/37] Properly encoding ld1h --- src/coreclr/jit/codegenarm64.cpp | 4 ++++ src/coreclr/jit/emitarm64.cpp | 27 +++++++++++++++++++++++++-- src/coreclr/jit/instrsarm64sve.h | 4 ++-- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 1335da349d0130..bd83b51a479084 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10699,6 +10699,10 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_D); // LD1SH {.D }, /Z, [{, #, MUL VL}] // IF_SVE_IJ_3A_G + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R6, 1, + INS_OPTS_SCALABLE_H); // LD1H {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R6, 1, + INS_OPTS_SCALABLE_S); // LD1H {.S }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R6, 1, INS_OPTS_SCALABLE_D); // LD1H {.D }, /Z, [{, #, MUL VL}] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 72651b06ce324c..c35aec3198bfac 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9474,7 +9474,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; case INS_sve_ld1h: - assert(opt == INS_OPTS_SCALABLE_D); + assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); @@ -12763,6 +12763,11 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * Returns true if the specified format can encode the 'dtype' field. + */ + /*static*/ bool emitter::canEncodeSveElemsize_dtype(insFormat fmt) { switch (fmt) @@ -12771,6 +12776,7 @@ void emitter::emitIns_Call(EmitCallType callType, case IF_SVE_IJ_3A_D: case IF_SVE_IJ_3A_E: case IF_SVE_IJ_3A_F: + case IF_SVE_IJ_3A_G: return true; default: @@ -12781,7 +12787,7 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction - * This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. + * based on the format for the 'dtype' field. */ /*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype(insFormat fmt, emitAttr size, code_t code) @@ -12799,6 +12805,7 @@ void emitter::emitIns_Call(EmitCallType callType, return code; // By default, the instruction already encodes 64-bit. case EA_16BYTE: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '0' to get the proper encoding for Q. return (code & ~((1 << 22) | (1 << 21) | (1 << 15))) | (1 << 20); // Set bits '22', '21' and '15' to 0. Set bit '20' to 1. default: @@ -12853,6 +12860,22 @@ void emitter::emitIns_Call(EmitCallType callType, assert(!"Invalid size for encoding dtype."); } + case IF_SVE_IJ_3A_G: + switch (size) + { + case EA_2BYTE: + return code & ~(1 << 22); // Set bit '22' to 0. + + case EA_4BYTE: + return code & ~(1 << 21); // Set bit '21' to 0. + + case EA_8BYTE: + return code; // By default, the instruction already encodes 64-bit. + + default: + assert(!"Invalid size for encoding dtype."); + } + default: assert(!"Invalid format for encoding dtype."); } diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index 01ecba92d33417..b2640aa65e967b 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -119,7 +119,7 @@ INST9(ld1sh, "ld1sh", 0, IF_SV // enum name info SVE_HW_4A SVE_HW_4A_A SVE_HW_4A_B SVE_HW_4A_C SVE_HW_4B SVE_HW_4B_D SVE_HX_3A_E SVE_IJ_3A_G SVE_IK_4A_I -INST9(ld1h, "ld1h", 0, IF_SVE_9B, 0x84A04000, 0xC4A04000, 0xC4804000, 0x84804000, 0xC4E0C000, 0xC4C0C000, 0x84A0C000, 0xA480A000, 0xA4804000 ) +INST9(ld1h, "ld1h", 0, IF_SVE_9B, 0x84A04000, 0xC4A04000, 0xC4804000, 0x84804000, 0xC4E0C000, 0xC4C0C000, 0x84A0C000, 0xA4E0A000, 0xA4804000 ) // LD1H {.S }, /Z, [, .S, #1] SVE_HW_4A 100001001h1mmmmm 010gggnnnnnttttt 84A0 4000 // LD1H {.D }, /Z, [, .D, #1] SVE_HW_4A_A 110001001h1mmmmm 010gggnnnnnttttt C4A0 4000 // LD1H {.D }, /Z, [, .D, ] SVE_HW_4A_B 110001001h0mmmmm 010gggnnnnnttttt C480 4000 @@ -127,7 +127,7 @@ INST9(ld1h, "ld1h", 0, IF_SV // LD1H {.D }, /Z, [, .D, LSL #1] SVE_HW_4B 11000100111mmmmm 110gggnnnnnttttt C4E0 C000 // LD1H {.D }, /Z, [, .D] SVE_HW_4B_D 11000100110mmmmm 110gggnnnnnttttt C4C0 C000 // LD1H {.D }, /Z, [.D{, #}] SVE_HX_3A_E 10000100101iiiii 110gggnnnnnttttt 84A0 C000 - // LD1H {.D }, /Z, [{, #, MUL VL}] SVE_IJ_3A_G 101001001000iiii 101gggnnnnnttttt A480 A000 + // LD1H {.D }, /Z, [{, #, MUL VL}] SVE_IJ_3A_G 101001001110iiii 101gggnnnnnttttt A4E0 A000 // LD1H {.D }, /Z, [, , LSL #1] SVE_IK_4A_I 10100100100mmmmm 010gggnnnnnttttt A480 4000 From 9300110a2bf5fe5cb0fa2f7df6be021b8596da22 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 12:10:57 -0800 Subject: [PATCH 12/37] Fixing up comments --- src/coreclr/jit/emitarm64.cpp | 2 +- src/coreclr/jit/emitarm64.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index c35aec3198bfac..6751baa780906f 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -12786,7 +12786,7 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + * Returns the encoding to select the 1/2/4/8/16 byte elemsize for an Arm64 Sve vector instruction * based on the format for the 'dtype' field. */ diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index d9f6dcf59addf2..1ef35458100911 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -472,8 +472,11 @@ static code_t insEncodeSveElemsize(insOpts opt); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); +// Returns true if the specified format can encode the 'dtype' field. static bool canEncodeSveElemsize_dtype(insFormat fmt); +// Returns the encoding to select the 1/2/4/8/16 byte elemsize for an Arm64 Sve vector instruction +// based on the format for the 'dtype' field. static code_t insEncodeSveElemsize_dtype(insFormat fmt, emitAttr size, code_t code); static code_t insEncodeSimm4_19_to_16(ssize_t imm); From 8233187ef6ac418e5298fe25e8bc1868af972741 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 12:26:20 -0800 Subject: [PATCH 13/37] Simplifying insEncodeSveElemsize_dtype --- src/coreclr/jit/emitarm64.cpp | 99 ++++++++++++++--------------------- 1 file changed, 40 insertions(+), 59 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 6751baa780906f..950f2f2dce8292 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -12793,92 +12793,73 @@ void emitter::emitIns_Call(EmitCallType callType, /*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype(insFormat fmt, emitAttr size, code_t code) { assert(canEncodeSveElemsize_dtype(fmt)); - switch (fmt) + + switch (size) { - case IF_SVE_IH_3A_F: - switch (size) + case EA_1BYTE: + switch (fmt) { - case EA_4BYTE: - return code & ~(1 << 21); // Set bit '21' to 0. - - case EA_8BYTE: - return code; // By default, the instruction already encodes 64-bit. - - case EA_16BYTE: - // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '0' to get the proper encoding for Q. - return (code & ~((1 << 22) | (1 << 21) | (1 << 15))) | (1 << 20); // Set bits '22', '21' and '15' to 0. Set bit '20' to 1. + case IF_SVE_IJ_3A_E: + return code & ~((1 << 22) | (1 << 21)); // Set bit '22' and '21' to 0. default: - assert(!"Invalid size for encoding dtype."); + assert(!"Invalid format for encoding dtype."); } + return code; - case IF_SVE_IJ_3A_D: - switch (size) + case EA_2BYTE: + switch (fmt) { - case EA_2BYTE: - return code | (1 << 22); // Set bit '22' to 1. - - case EA_4BYTE: - return code | (1 << 21); // Set bit '21' to 1. + case IF_SVE_IJ_3A_E: + case IF_SVE_IJ_3A_G: + return code & ~(1 << 22); // Set bit '22' to 0. - case EA_8BYTE: - return code; // By default, the instruction already encodes 64-bit. + case IF_SVE_IJ_3A_D: + return code | (1 << 22); // Set bit '22' to 1. default: - assert(!"Invalid size for encoding dtype."); + assert(!"Invalid format for encoding dtype."); } + return code; - case IF_SVE_IJ_3A_E: - switch (size) + case EA_4BYTE: + switch (fmt) { - case EA_1BYTE: - return code & ~((1 << 22) | (1 << 21)); // Set bit '22' and '21' to 0. - - case EA_2BYTE: - return code & ~(1 << 22); // Set bit '22' to 0. - - case EA_4BYTE: + case IF_SVE_IH_3A_F: + case IF_SVE_IJ_3A_E: + case IF_SVE_IJ_3A_G: return code & ~(1 << 21); // Set bit '21' to 0. - case EA_8BYTE: - return code; // By default, the instruction already encodes 64-bit. - - default: - assert(!"Invalid size for encoding dtype."); - } - - case IF_SVE_IJ_3A_F: - switch (size) - { - case EA_4BYTE: + case IF_SVE_IJ_3A_D: + case IF_SVE_IJ_3A_F: return code | (1 << 21); // Set bit '21' to 1. - case EA_8BYTE: - return code; // By default, the instruction already encodes 64-bit. - default: - assert(!"Invalid size for encoding dtype."); + assert(!"Invalid format for encoding dtype."); } + return code; - case IF_SVE_IJ_3A_G: - switch (size) - { - case EA_2BYTE: - return code & ~(1 << 22); // Set bit '22' to 0. - - case EA_4BYTE: - return code & ~(1 << 21); // Set bit '21' to 0. + case EA_8BYTE: + return code; // By default, the instruction already encodes 64-bit. - case EA_8BYTE: - return code; // By default, the instruction already encodes 64-bit. + case EA_16BYTE: + switch (fmt) + { + case IF_SVE_IH_3A_F: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '0' to get the + // proper encoding for Q. + return (code & ~((1 << 22) | (1 << 21) | (1 << 15))) | + (1 << 20); // Set bits '22', '21' and '15' to 0. Set bit '20' to 1. default: - assert(!"Invalid size for encoding dtype."); + assert(!"Invalid format for encoding dtype."); } + return code; default: - assert(!"Invalid format for encoding dtype."); + assert(!"Invalid size for encoding dtype."); } + return code; } From 224a8961281bdaf3910d90a60697d7b0d4fc56b9 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 13:06:20 -0800 Subject: [PATCH 14/37] Instruction determines how to encode the dtype --- src/coreclr/jit/codegenarm64.cpp | 4 ++ src/coreclr/jit/emitarm64.cpp | 70 +++++++++++++++++--------------- src/coreclr/jit/emitarm64.h | 8 ++-- src/coreclr/jit/instrsarm64sve.h | 4 +- 4 files changed, 47 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index bd83b51a479084..e776502734946a 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10717,6 +10717,10 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_D); // LDNF1SW {.D }, /Z, [{, #, MUL VL}] // IF_SVE_IL_3A_A + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sh, EA_SCALABLE, REG_V0, REG_P1, REG_R5, 5, + INS_OPTS_SCALABLE_S); // LDNF1SH {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1w, EA_SCALABLE, REG_V0, REG_P2, REG_R4, 5, + INS_OPTS_SCALABLE_S); // LDNF1W {.S }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sh, EA_SCALABLE, REG_V0, REG_P1, REG_R5, 5, INS_OPTS_SCALABLE_D); // LDNF1SH {.D }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1w, EA_SCALABLE, REG_V0, REG_P2, REG_R4, 5, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 950f2f2dce8292..571dfa3dedd503 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9484,7 +9484,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnf1sw: case INS_sve_ldnf1d: - assert(insOptsScalable(opt)); + assert(opt == INS_OPTS_SCALABLE_D); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); @@ -9494,7 +9494,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnf1sh: case INS_sve_ldnf1w: - assert(insOptsScalable(opt)); + assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); @@ -12765,18 +12765,20 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * Returns true if the specified format can encode the 'dtype' field. + * Returns true if the specified instruction can encode the 'dtype' field. */ -/*static*/ bool emitter::canEncodeSveElemsize_dtype(insFormat fmt) +/*static*/ bool emitter::canEncodeSveElemsize_dtype(instruction ins) { - switch (fmt) + switch (ins) { - case IF_SVE_IH_3A_F: - case IF_SVE_IJ_3A_D: - case IF_SVE_IJ_3A_E: - case IF_SVE_IJ_3A_F: - case IF_SVE_IJ_3A_G: + case INS_sve_ld1w: + case INS_sve_ld1sb: + case INS_sve_ld1b: + case INS_sve_ld1sh: + case INS_sve_ld1h: + case INS_sve_ldnf1sh: + case INS_sve_ldnf1w: return true; default: @@ -12787,55 +12789,57 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * * Returns the encoding to select the 1/2/4/8/16 byte elemsize for an Arm64 Sve vector instruction - * based on the format for the 'dtype' field. + * for the 'dtype' field. */ -/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype(insFormat fmt, emitAttr size, code_t code) +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t code) { - assert(canEncodeSveElemsize_dtype(fmt)); + assert(canEncodeSveElemsize_dtype(ins)); switch (size) { case EA_1BYTE: - switch (fmt) + switch (ins) { - case IF_SVE_IJ_3A_E: + case INS_sve_ld1b: return code & ~((1 << 22) | (1 << 21)); // Set bit '22' and '21' to 0. default: - assert(!"Invalid format for encoding dtype."); + assert(!"Invalid instruction for encoding dtype."); } return code; case EA_2BYTE: - switch (fmt) + switch (ins) { - case IF_SVE_IJ_3A_E: - case IF_SVE_IJ_3A_G: + case INS_sve_ld1b: + case INS_sve_ld1h: return code & ~(1 << 22); // Set bit '22' to 0. - case IF_SVE_IJ_3A_D: + case INS_sve_ld1sb: return code | (1 << 22); // Set bit '22' to 1. default: - assert(!"Invalid format for encoding dtype."); + assert(!"Invalid instruction for encoding dtype."); } return code; case EA_4BYTE: - switch (fmt) + switch (ins) { - case IF_SVE_IH_3A_F: - case IF_SVE_IJ_3A_E: - case IF_SVE_IJ_3A_G: + case INS_sve_ld1w: + case INS_sve_ld1b: + case INS_sve_ld1h: + case INS_sve_ldnf1w: return code & ~(1 << 21); // Set bit '21' to 0. - case IF_SVE_IJ_3A_D: - case IF_SVE_IJ_3A_F: + case INS_sve_ld1sb: + case INS_sve_ld1sh: + case INS_sve_ldnf1sh: return code | (1 << 21); // Set bit '21' to 1. default: - assert(!"Invalid format for encoding dtype."); + assert(!"Invalid instruction for encoding dtype."); } return code; @@ -12843,16 +12847,16 @@ void emitter::emitIns_Call(EmitCallType callType, return code; // By default, the instruction already encodes 64-bit. case EA_16BYTE: - switch (fmt) + switch (ins) { - case IF_SVE_IH_3A_F: + case INS_sve_ld1w: // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '0' to get the // proper encoding for Q. return (code & ~((1 << 22) | (1 << 21) | (1 << 15))) | (1 << 20); // Set bits '22', '21' and '15' to 0. Set bit '20' to 1. default: - assert(!"Invalid format for encoding dtype."); + assert(!"Invalid instruction for encoding dtype."); } return code; @@ -14960,9 +14964,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg code |= insEncodeSimm4_19_to_16(imm); // iiii - if (canEncodeSveElemsize_dtype(fmt)) + if (canEncodeSveElemsize_dtype(ins)) { - code = insEncodeSveElemsize_dtype(fmt, optGetSveElemsize(id->idInsOpt()), code); + code = insEncodeSveElemsize_dtype(ins, optGetSveElemsize(id->idInsOpt()), code); } dst += emitOutput_Instr(dst, code); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 1ef35458100911..a4bfab49ac35de 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -472,12 +472,12 @@ static code_t insEncodeSveElemsize(insOpts opt); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); -// Returns true if the specified format can encode the 'dtype' field. -static bool canEncodeSveElemsize_dtype(insFormat fmt); +// Returns true if the specified instruction can encode the 'dtype' field. +static bool canEncodeSveElemsize_dtype(instruction ins); // Returns the encoding to select the 1/2/4/8/16 byte elemsize for an Arm64 Sve vector instruction -// based on the format for the 'dtype' field. -static code_t insEncodeSveElemsize_dtype(insFormat fmt, emitAttr size, code_t code); +// for the 'dtype' field. +static code_t insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t code); static code_t insEncodeSimm4_19_to_16(ssize_t imm); diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index b2640aa65e967b..6108ac2385bc74 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -2839,8 +2839,8 @@ INST1(ldnf1sb, "ldnf1sb", 0, IF_SV INST1(ldnf1sh, "ldnf1sh", 0, IF_SVE_IL_3A_A, 0xA510A000 ) // LDNF1SH {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_A 101001010001iiii 101gggnnnnnttttt A510 A000 -INST1(ldnf1w, "ldnf1w", 0, IF_SVE_IL_3A_A, 0xA550A000 ) - // LDNF1W {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_A 101001010101iiii 101gggnnnnnttttt A550 A000 +INST1(ldnf1w, "ldnf1w", 0, IF_SVE_IL_3A_A, 0xA570A000 ) + // LDNF1W {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_A 101001010111iiii 101gggnnnnnttttt A570 A000 // enum name info SVE_IW_4A From 1a4a5456aff47365ba788693178e2ba284168293 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 13:34:27 -0800 Subject: [PATCH 15/37] More encodings --- src/coreclr/jit/codegenarm64.cpp | 14 ++++++++++++++ src/coreclr/jit/emitarm64.cpp | 14 ++++++++++++-- src/coreclr/jit/instrsarm64sve.h | 8 ++++---- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index e776502734946a..902a230529b968 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10727,12 +10727,26 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_D); // LDNF1W {.D }, /Z, [{, #, MUL VL}] // IF_SVE_IL_3A_B + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1h, EA_SCALABLE, REG_V1, REG_P3, REG_R2, 5, + INS_OPTS_SCALABLE_H); // LDNF1H {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sb, EA_SCALABLE, REG_V0, REG_P4, REG_R1, 5, + INS_OPTS_SCALABLE_H); // LDNF1SB {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1h, EA_SCALABLE, REG_V1, REG_P3, REG_R2, 5, + INS_OPTS_SCALABLE_S); // LDNF1H {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sb, EA_SCALABLE, REG_V0, REG_P4, REG_R1, 5, + INS_OPTS_SCALABLE_S); // LDNF1SB {.S }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1h, EA_SCALABLE, REG_V1, REG_P3, REG_R2, 5, INS_OPTS_SCALABLE_D); // LDNF1H {.D }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sb, EA_SCALABLE, REG_V0, REG_P4, REG_R1, 5, INS_OPTS_SCALABLE_D); // LDNF1SB {.D }, /Z, [{, #, MUL VL}] // IF_SVE_IL_3A_C + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 5, + INS_OPTS_SCALABLE_B); // LDNF1B {.B }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 5, + INS_OPTS_SCALABLE_H); // LDNF1B {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 5, + INS_OPTS_SCALABLE_S); // LDNF1B {.S }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 5, INS_OPTS_SCALABLE_D); // LDNF1B {.D }, /Z, [{, #, MUL VL}] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 571dfa3dedd503..1b463893b45521 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9504,7 +9504,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnf1h: case INS_sve_ldnf1sb: - assert(insOptsScalable(opt)); + assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); @@ -9513,7 +9513,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; case INS_sve_ldnf1b: - assert(insOptsScalable(opt)); + assert(insOptsScalableSimple(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); @@ -12779,6 +12779,9 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_sve_ld1h: case INS_sve_ldnf1sh: case INS_sve_ldnf1w: + case INS_sve_ldnf1h: + case INS_sve_ldnf1sb: + case INS_sve_ldnf1b: return true; default: @@ -12802,6 +12805,7 @@ void emitter::emitIns_Call(EmitCallType callType, switch (ins) { case INS_sve_ld1b: + case INS_sve_ldnf1b: return code & ~((1 << 22) | (1 << 21)); // Set bit '22' and '21' to 0. default: @@ -12814,9 +12818,12 @@ void emitter::emitIns_Call(EmitCallType callType, { case INS_sve_ld1b: case INS_sve_ld1h: + case INS_sve_ldnf1h: + case INS_sve_ldnf1b: return code & ~(1 << 22); // Set bit '22' to 0. case INS_sve_ld1sb: + case INS_sve_ldnf1sb: return code | (1 << 22); // Set bit '22' to 1. default: @@ -12831,11 +12838,14 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_sve_ld1b: case INS_sve_ld1h: case INS_sve_ldnf1w: + case INS_sve_ldnf1h: + case INS_sve_ldnf1b: return code & ~(1 << 21); // Set bit '21' to 0. case INS_sve_ld1sb: case INS_sve_ld1sh: case INS_sve_ldnf1sh: + case INS_sve_ldnf1sb: return code | (1 << 21); // Set bit '21' to 1. default: diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index 6108ac2385bc74..d270b5f54709e6 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -2815,8 +2815,8 @@ INST1(ld1rw, "ld1rw", 0, IF_SV // enum name info SVE_IL_3A_C -INST1(ldnf1b, "ldnf1b", 0, IF_SVE_IL_3A_C, 0xA410A000 ) - // LDNF1B {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_C 101001000001iiii 101gggnnnnnttttt A410 A000 +INST1(ldnf1b, "ldnf1b", 0, IF_SVE_IL_3A_C, 0xA470A000 ) + // LDNF1B {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_C 101001000111iiii 101gggnnnnnttttt A470 A000 // enum name info SVE_IL_3A @@ -2828,8 +2828,8 @@ INST1(ldnf1sw, "ldnf1sw", 0, IF_SV // enum name info SVE_IL_3A_B -INST1(ldnf1h, "ldnf1h", 0, IF_SVE_IL_3A_B, 0xA490A000 ) - // LDNF1H {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_B 101001001001iiii 101gggnnnnnttttt A490 A000 +INST1(ldnf1h, "ldnf1h", 0, IF_SVE_IL_3A_B, 0xA4F0A000 ) + // LDNF1H {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_B 101001001111iiii 101gggnnnnnttttt A4F0 A000 INST1(ldnf1sb, "ldnf1sb", 0, IF_SVE_IL_3A_B, 0xA590A000 ) // LDNF1SB {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_B 101001011001iiii 101gggnnnnnttttt A590 A000 From ab48edef8313aaea5e477b0128ba4043a2e3de57 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 15:35:37 -0800 Subject: [PATCH 16/37] More encodings --- src/coreclr/jit/codegenarm64.cpp | 36 ++++- src/coreclr/jit/emitarm64.cpp | 244 ++++++++++++++++++++++++++++++- src/coreclr/jit/emitarm64.h | 16 ++ 3 files changed, 287 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 902a230529b968..83b19c7e79af03 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10741,15 +10741,43 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_D); // LDNF1SB {.D }, /Z, [{, #, MUL VL}] // IF_SVE_IL_3A_C - theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 5, + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, -4, INS_OPTS_SCALABLE_B); // LDNF1B {.B }, /Z, [{, #, MUL VL}] - theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 5, + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, -2, INS_OPTS_SCALABLE_H); // LDNF1B {.H }, /Z, [{, #, MUL VL}] - theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 5, + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 2, INS_OPTS_SCALABLE_S); // LDNF1B {.S }, /Z, [{, #, MUL VL}] - theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 5, + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 1, INS_OPTS_SCALABLE_D); // LDNF1B {.D }, /Z, [{, #, MUL VL}] + // IF_SVE_IM_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ldnt1b, EA_SCALABLE, REG_V0, REG_P1, REG_R2, -5, + INS_OPTS_SCALABLE_B); // LDNT1B {.B }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnt1d, EA_SCALABLE, REG_V3, REG_P4, REG_R5, -1, + INS_OPTS_SCALABLE_D); // LDNT1D {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnt1h, EA_SCALABLE, REG_V6, REG_P7, REG_R8, 0, + INS_OPTS_SCALABLE_H); // LDNT1H {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnt1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, -8, + INS_OPTS_SCALABLE_S); // LDNT1W {.S }, /Z, [{, #, MUL VL}] + + // IF_SVE_IO_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rob, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 0, + INS_OPTS_SCALABLE_B); // LD1ROB {.B }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rod, EA_SCALABLE, REG_V4, REG_P5, REG_R6, -32, + INS_OPTS_SCALABLE_D); // LD1ROD {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1roh, EA_SCALABLE, REG_V8, REG_P3, REG_R1, -256, + INS_OPTS_SCALABLE_H); // LD1ROH {.H }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1row, EA_SCALABLE, REG_V3, REG_P4, REG_R0, 224, + INS_OPTS_SCALABLE_S); // LD1ROW {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqb, EA_SCALABLE, REG_V6, REG_P7, REG_R8, 64, + INS_OPTS_SCALABLE_B); // LD1RQB {.B }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqd, EA_SCALABLE, REG_V9, REG_P0, REG_R1, -128, + INS_OPTS_SCALABLE_D); // LD1RQD {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqh, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 112, + INS_OPTS_SCALABLE_H); // LD1RQH {.H }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqw, EA_SCALABLE, REG_V3, REG_P2, REG_R1, -16, + INS_OPTS_SCALABLE_S); // LD1RQW {.S }, /Z, [{, #}] + #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 1b463893b45521..07414f76542eae 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1230,7 +1230,30 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg1())); // ttttt assert(isPredicateRegister(id->idReg2())); // ggg assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isValidSimm4(emitGetInsSC(id))); // iiii + +#ifdef DEBUG + switch (id->idIns()) + { + case INS_sve_ld1rqb: + case INS_sve_ld1rqd: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + assert(isValidSimm4_MultipleOf16(emitGetInsSC(id))); // iiii + break; + + case INS_sve_ld1rob: + case INS_sve_ld1rod: + case INS_sve_ld1roh: + case INS_sve_ld1row: + assert(isValidSimm4_MultipleOf32(emitGetInsSC(id))); // iiii + break; + + default: + assert(isValidSimm4(emitGetInsSC(id))); // iiii + break; + } +#endif // DEBUG + assert(isScalableVectorSize(elemsize)); break; @@ -9521,6 +9544,110 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_IL_3A_C; break; + case INS_sve_ldnt1b: + case INS_sve_ldnt1h: + case INS_sve_ldnt1w: + case INS_sve_ldnt1d: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_ldnt1b: + assert(opt == INS_OPTS_SCALABLE_B); + break; + + case INS_sve_ldnt1h: + assert(opt == INS_OPTS_SCALABLE_H); + break; + + case INS_sve_ldnt1w: + assert(opt == INS_OPTS_SCALABLE_S); + break; + + case INS_sve_ldnt1d: + assert(opt == INS_OPTS_SCALABLE_D); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_IM_3A; + break; + + case INS_sve_ld1rqb: + case INS_sve_ld1rob: + case INS_sve_ld1rqh: + case INS_sve_ld1roh: + case INS_sve_ld1rqw: + case INS_sve_ld1row: + case INS_sve_ld1rqd: + case INS_sve_ld1rod: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_ld1rqb: + case INS_sve_ld1rqd: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + assert(isValidSimm4_MultipleOf16(imm)); + break; + + case INS_sve_ld1rob: + case INS_sve_ld1rod: + case INS_sve_ld1roh: + case INS_sve_ld1row: + assert(isValidSimm4_MultipleOf32(imm)); + break; + + default: + assert(isValidSimm4(imm)); + break; + } + + switch (ins) + { + case INS_sve_ld1rqb: + case INS_sve_ld1rob: + assert(opt == INS_OPTS_SCALABLE_B); + break; + + case INS_sve_ld1rqh: + case INS_sve_ld1roh: + assert(opt == INS_OPTS_SCALABLE_H); + break; + + case INS_sve_ld1rqw: + case INS_sve_ld1row: + assert(opt == INS_OPTS_SCALABLE_S); + break; + + case INS_sve_ld1rqd: + case INS_sve_ld1rod: + assert(opt == INS_OPTS_SCALABLE_D); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_IO_3A; + break; + default: unreached(); break; @@ -12885,9 +13012,35 @@ void emitter::emitIns_Call(EmitCallType callType, /*static*/ emitter::code_t emitter::insEncodeSimm4_19_to_16(ssize_t imm) { assert(isValidSimm4(imm)); + if (imm < 0) + { + imm = (imm & 0xF); + } return (code_t)imm << 16; } +/***************************************************************************** + * + * TODO + */ + +/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm) +{ + assert(isValidSimm4_MultipleOf16(imm)); + return insEncodeSimm4_19_to_16(imm / 16); +} + +/***************************************************************************** + * + * TODO + */ + +/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm) +{ + assert(isValidSimm4_MultipleOf32(imm)); + return insEncodeSimm4_19_to_16(imm / 32); +} + BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id) { instruction ins = id->idIns(); @@ -14972,7 +15125,27 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg - code |= insEncodeSimm4_19_to_16(imm); // iiii + + switch (ins) + { + case INS_sve_ld1rqb: + case INS_sve_ld1rqd: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + code |= insEncodeSimm4_MultipleOf16_19_to_16(imm); // iiii + break; + + case INS_sve_ld1rob: + case INS_sve_ld1rod: + case INS_sve_ld1roh: + case INS_sve_ld1row: + code |= insEncodeSimm4_MultipleOf32_19_to_16(imm); // iiii + break; + + default: + code |= insEncodeSimm4_19_to_16(imm); // iiii + break; + } if (canEncodeSveElemsize_dtype(ins)) { @@ -17358,6 +17531,7 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg1(), id->idInsOpt(), false); // ddddd break; + // { .D }, /Z, [{, #, MUL VL}] case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus @@ -17369,7 +17543,6 @@ void emitter::emitDispInsHelp( case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - // { .D }, /Z, [{, #, MUL VL}] case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus // immediate) @@ -17398,8 +17571,20 @@ void emitter::emitDispInsHelp( emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn if (imm != 0) { - emitDispImm(emitGetInsSC(id), true); // iiii - printf("mul vl"); + switch (fmt) + { + case IF_SVE_IO_3A: + // This does not have to be printed as hex. + // We only do it because the capstone disassembly displays this immediate as hex. + // We could not modify capstone without affecting other cases. + emitDispImm(emitGetInsSC(id), false, /* alwaysHex */ true); // iiii + break; + + default: + emitDispImm(emitGetInsSC(id), true); // iiii + printf("mul vl"); + break; + } } printf("]"); break; @@ -19917,6 +20102,55 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_6C; break; + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + switch (ins) + { + case INS_sve_ld1rqb: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1rob: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqh: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1roh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqw: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1row: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqd: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1rod: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index a4bfab49ac35de..52784cae798a22 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -481,6 +481,10 @@ static code_t insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t static code_t insEncodeSimm4_19_to_16(ssize_t imm); +static code_t insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm); + +static code_t insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm); + // Returns true if 'reg' represents an integer register. static bool isIntegerRegister(regNumber reg) { @@ -499,6 +503,18 @@ static bool isValidSimm4(ssize_t value) return (-8 <= value) && (value <= 7); }; +// Returns true if 'value' is a legal signed multiple of 16 immediate 4 bit encoding (such as for LD1RQB). +static bool isValidSimm4_MultipleOf16(ssize_t value) +{ + return (-128 <= value) && (value <= 112) && (value % 16 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 32 immediate 4 bit encoding (such as for LD1ROB). +static bool isValidSimm4_MultipleOf32(ssize_t value) +{ + return (-256 <= value) && (value <= 224) && (value % 32 == 0); +}; + // Returns true if 'value' is a legal unsigned immediate 5 bit encoding (such as for CCMP). static bool isValidUimm5(ssize_t value) { From a97db1e3bf637c55688293d40e739122e3ed8f5e Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 13 Dec 2023 17:06:52 -0800 Subject: [PATCH 17/37] Added SVE_IQ_3A encoding --- src/coreclr/jit/codegenarm64.cpp | 52 ++++++++++ src/coreclr/jit/emitarm64.cpp | 173 +++++++++++++++++++++++++++++-- src/coreclr/jit/emitarm64.h | 24 +++++ 3 files changed, 243 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 83b19c7e79af03..ca74851ac15b03 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10778,6 +10778,58 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqw, EA_SCALABLE, REG_V3, REG_P2, REG_R1, -16, INS_OPTS_SCALABLE_S); // LD1RQW {.S }, /Z, [{, #}] + // IF_SVE_IQ_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V0, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 14, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3q, EA_SCALABLE, REG_V0, REG_P4, REG_R5, -24, + INS_OPTS_SCALABLE_Q); // LD3Q {.Q, .Q, .Q }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3q, EA_SCALABLE, REG_V0, REG_P4, REG_R5, 21, + INS_OPTS_SCALABLE_Q); // LD3Q {.Q, .Q, .Q }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V0, REG_P5, REG_R3, -32, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V0, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V12, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V13, REG_P1, REG_R2, 14, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3q, EA_SCALABLE, REG_V14, REG_P4, REG_R5, -24, + INS_OPTS_SCALABLE_Q); // LD3Q {.Q, .Q, .Q }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3q, EA_SCALABLE, REG_V15, REG_P4, REG_R5, 21, + INS_OPTS_SCALABLE_Q); // LD3Q {.Q, .Q, .Q }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V16, REG_P5, REG_R3, -32, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V27, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V28, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V29, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V30, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V31, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V31, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 07414f76542eae..d0e1d7f2ad79dd 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1234,6 +1234,18 @@ void emitter::emitInsSanityCheck(instrDesc* id) #ifdef DEBUG switch (id->idIns()) { + case INS_sve_ld2q: + assert(isValidSimm4_MultipleOf2(emitGetInsSC(id))); // iiii + break; + + case INS_sve_ld3q: + assert(isValidSimm4_MultipleOf3(emitGetInsSC(id))); // iiii + break; + + case INS_sve_ld4q: + assert(isValidSimm4_MultipleOf4(emitGetInsSC(id))); // iiii + break; + case INS_sve_ld1rqb: case INS_sve_ld1rqd: case INS_sve_ld1rqh: @@ -9613,7 +9625,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; default: - assert(isValidSimm4(imm)); + assert(!"Invalid instruction"); break; } @@ -9648,6 +9660,38 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_IO_3A; break; + case INS_sve_ld2q: + case INS_sve_ld3q: + case INS_sve_ld4q: + assert(opt == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_ld2q: + assert(isValidSimm4_MultipleOf2(imm)); + break; + + case INS_sve_ld3q: + assert(isValidSimm4_MultipleOf3(imm)); + break; + + case INS_sve_ld4q: + assert(isValidSimm4_MultipleOf4(imm)); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_IQ_3A; + break; + default: unreached(); break; @@ -13019,6 +13063,39 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)imm << 16; } +/***************************************************************************** + * + * TODO + */ + +/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf2_19_to_16(ssize_t imm) +{ + assert(isValidSimm4_MultipleOf2(imm)); + return insEncodeSimm4_19_to_16(imm / 2); +} + +/***************************************************************************** + * + * TODO + */ + +/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf3_19_to_16(ssize_t imm) +{ + assert(isValidSimm4_MultipleOf3(imm)); + return insEncodeSimm4_19_to_16(imm / 3); +} + +/***************************************************************************** + * + * TODO + */ + +/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf4_19_to_16(ssize_t imm) +{ + assert(isValidSimm4_MultipleOf4(imm)); + return insEncodeSimm4_19_to_16(imm / 4); +} + /***************************************************************************** * * TODO @@ -15128,6 +15205,18 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) switch (ins) { + case INS_sve_ld2q: + code |= insEncodeSimm4_MultipleOf2_19_to_16(imm); // iiii + break; + + case INS_sve_ld3q: + code |= insEncodeSimm4_MultipleOf3_19_to_16(imm); // iiii + break; + + case INS_sve_ld4q: + code |= insEncodeSimm4_MultipleOf4_19_to_16(imm); // iiii + break; + case INS_sve_ld1rqb: case INS_sve_ld1rqd: case INS_sve_ld1rqh: @@ -15717,11 +15806,28 @@ void emitter::emitDispSveRegList(regNumber firstReg, unsigned listSize, insOpts regNumber currReg = firstReg; printf("{ "); - for (unsigned i = 0; i < listSize; i++) + if (listSize > 1) { - const bool notLastRegister = (i != listSize - 1); - emitDispSveReg(currReg, opt, notLastRegister); - currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg); + if ((listSize == 2) || (((unsigned)currReg + listSize - 1) > (unsigned)REG_V31)) + { + for (unsigned i = 0; i < listSize; i++) + { + const bool notLastRegister = (i != listSize - 1); + emitDispSveReg(currReg, opt, notLastRegister); + currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg); + } + } + else + { + // short-hand + emitDispSveReg(currReg, opt, false); + printf(" - "); + emitDispSveReg((regNumber)(currReg + listSize - 1), opt, false); + } + } + else if (listSize == 1) + { + emitDispSveReg(currReg, opt, false); } printf(" }"); @@ -17552,8 +17658,15 @@ void emitter::emitDispInsHelp( // immediate) case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus // immediate) + // { .B }, /Z, [{, #}] + // { .H }, /Z, [{, #}] + // { .S }, /Z, [{, #}] + // { .D }, /Z, [{, #}] case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus // immediate) + // { .Q, .Q }, /Z, [{, #, MUL VL}] + // { .Q, .Q, .Q }, /Z, [{, #, MUL VL}] + // { .Q, .Q, .Q, .Q }, /Z, [{, #, MUL VL}] case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus // immediate) case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) @@ -17565,7 +17678,24 @@ void emitter::emitDispInsHelp( case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) imm = emitGetInsSC(id); - emitDispSveRegList(id->idReg1(), 1, id->idInsOpt(), true); // ttttt + switch (ins) + { + case INS_sve_ld2q: + emitDispSveRegList(id->idReg1(), 2, id->idInsOpt(), true); // ttttt + break; + + case INS_sve_ld3q: + emitDispSveRegList(id->idReg1(), 3, id->idInsOpt(), true); // ttttt + break; + + case INS_sve_ld4q: + emitDispSveRegList(id->idReg1(), 4, id->idInsOpt(), true); // ttttt + break; + + default: + emitDispSveRegList(id->idReg1(), 1, id->idInsOpt(), true); // ttttt + break; + } emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, true); // ggg printf("["); emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn @@ -17580,6 +17710,14 @@ void emitter::emitDispInsHelp( emitDispImm(emitGetInsSC(id), false, /* alwaysHex */ true); // iiii break; + case IF_SVE_IQ_3A: + // This does not have to be printed as hex. + // We only do it because the capstone disassembly displays this immediate as hex. + // We could not modify capstone without affecting other cases. + emitDispImm(emitGetInsSC(id), true, /* alwaysHex */ true); // iiii + printf("mul vl"); + break; + default: emitDispImm(emitGetInsSC(id), true); // iiii printf("mul vl"); @@ -20151,6 +20289,29 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + switch (ins) + { + case INS_sve_ld2q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld3q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld4q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 52784cae798a22..330f45fa80b1bf 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -481,6 +481,12 @@ static code_t insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t static code_t insEncodeSimm4_19_to_16(ssize_t imm); +static code_t insEncodeSimm4_MultipleOf2_19_to_16(ssize_t imm); + +static code_t insEncodeSimm4_MultipleOf3_19_to_16(ssize_t imm); + +static code_t insEncodeSimm4_MultipleOf4_19_to_16(ssize_t imm); + static code_t insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm); static code_t insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm); @@ -503,6 +509,24 @@ static bool isValidSimm4(ssize_t value) return (-8 <= value) && (value <= 7); }; +// Returns true if 'value' is a legal signed multiple of 2 immediate 4 bit encoding (such as for LD2Q). +static bool isValidSimm4_MultipleOf2(ssize_t value) +{ + return (-16 <= value) && (value <= 14) && (value % 2 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 3 immediate 4 bit encoding (such as for LD3Q). +static bool isValidSimm4_MultipleOf3(ssize_t value) +{ + return (-24 <= value) && (value <= 21) && (value % 3 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 4 immediate 4 bit encoding (such as for LD4Q). +static bool isValidSimm4_MultipleOf4(ssize_t value) +{ + return (-32 <= value) && (value <= 28) && (value % 4 == 0); +}; + // Returns true if 'value' is a legal signed multiple of 16 immediate 4 bit encoding (such as for LD1RQB). static bool isValidSimm4_MultipleOf16(ssize_t value) { From 6fb3c7361d225d16356da0f5bca75e875d024d25 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 14 Dec 2023 15:10:31 -0800 Subject: [PATCH 18/37] Added SVE_IS_3A and SVE_JE_3A formats --- src/coreclr/jit/codegenarm64.cpp | 48 ++++++ src/coreclr/jit/emitarm64.cpp | 270 ++++++++++++++++++++++++++++++- 2 files changed, 317 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d1fe11d29c07d8..c67eb4860b25b6 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10920,6 +10920,54 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V31, REG_P1, REG_R2, -16, INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL // VL}] + + // IF_SVE_IS_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld2b, EA_SCALABLE, REG_V0, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_B); // LD2B {.B, .B }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2d, EA_SCALABLE, REG_V4, REG_P5, REG_R7, 14, + INS_OPTS_SCALABLE_D); // LD2D {.D, .D }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2h, EA_SCALABLE, REG_V6, REG_P5, REG_R4, 8, + INS_OPTS_SCALABLE_H); // LD2H {.H, .H }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2w, EA_SCALABLE, REG_V0, REG_P0, REG_R1, 2, + INS_OPTS_SCALABLE_S); // LD2W {.S, .S }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3b, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 21, + INS_OPTS_SCALABLE_B); // LD3B {.B, .B, .B }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3d, EA_SCALABLE, REG_V0, REG_P0, REG_R0, -24, + INS_OPTS_SCALABLE_D); // LD3D {.D, .D, .D }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3h, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 21, + INS_OPTS_SCALABLE_H); // LD3H {.H, .H, .H }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3w, EA_SCALABLE, REG_V0, REG_P0, REG_R0, -24, + INS_OPTS_SCALABLE_S); // LD3W {.S, .S, .S }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4b, EA_SCALABLE, REG_V31, REG_P2, REG_R1, -32, + INS_OPTS_SCALABLE_B); // LD4B {.B, .B, .B, .B }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4d, EA_SCALABLE, REG_V8, REG_P0, REG_R0, 28, + INS_OPTS_SCALABLE_D); // LD4D {.D, .D, .D, .D }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4h, EA_SCALABLE, REG_V5, REG_P4, REG_R3, -32, + INS_OPTS_SCALABLE_H); // LD4H {.H, .H, .H, .H }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4w, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 28, + INS_OPTS_SCALABLE_S); // LD4W {.S, .S, .S, .S }, /Z, + // [{, #, MUL VL}] + + // IF_SVE_JE_3A + theEmitter->emitIns_R_R_R_I(INS_sve_st2q, EA_SCALABLE, REG_V0, REG_P3, REG_R0, -16, + INS_OPTS_SCALABLE_Q); // ST2Q {.Q, .Q }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st3q, EA_SCALABLE, REG_V2, REG_P3, REG_R4, 21, + INS_OPTS_SCALABLE_Q); // ST3Q {.Q, .Q, .Q }, , [{, #, + // MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4q, EA_SCALABLE, REG_V7, REG_P6, REG_R5, 28, + INS_OPTS_SCALABLE_Q); // ST4Q {.Q, .Q, .Q, .Q }, , [{, + // #, MUL VL}] #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 78232038e01dd7..eaf7a863004f33 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1254,15 +1254,30 @@ void emitter::emitInsSanityCheck(instrDesc* id) #ifdef DEBUG switch (id->idIns()) { + case INS_sve_ld2b: + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: case INS_sve_ld2q: + case INS_sve_st2q: assert(isValidSimm4_MultipleOf2(emitGetInsSC(id))); // iiii break; + case INS_sve_ld3b: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: case INS_sve_ld3q: + case INS_sve_st3q: assert(isValidSimm4_MultipleOf3(emitGetInsSC(id))); // iiii break; + case INS_sve_ld4b: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: case INS_sve_ld4q: + case INS_sve_st4q: assert(isValidSimm4_MultipleOf4(emitGetInsSC(id))); // iiii break; @@ -9741,6 +9756,119 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_IQ_3A; break; + case INS_sve_ld2b: + case INS_sve_ld3b: + case INS_sve_ld4b: + case INS_sve_ld2h: + case INS_sve_ld3h: + case INS_sve_ld4h: + case INS_sve_ld2w: + case INS_sve_ld3w: + case INS_sve_ld4w: + case INS_sve_ld2d: + case INS_sve_ld3d: + case INS_sve_ld4d: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_ld2b: + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: + assert(isValidSimm4_MultipleOf2(imm)); + break; + + case INS_sve_ld3b: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: + assert(isValidSimm4_MultipleOf3(imm)); + break; + + case INS_sve_ld4b: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: + assert(isValidSimm4_MultipleOf4(imm)); + break; + + default: + assert(!"Invalid instruction"); + break; + } + + switch (ins) + { + case INS_sve_ld2b: + case INS_sve_ld3b: + case INS_sve_ld4b: + assert(opt == INS_OPTS_SCALABLE_B); + break; + + case INS_sve_ld2h: + case INS_sve_ld3h: + case INS_sve_ld4h: + assert(opt == INS_OPTS_SCALABLE_H); + break; + + case INS_sve_ld2w: + case INS_sve_ld3w: + case INS_sve_ld4w: + assert(opt == INS_OPTS_SCALABLE_S); + break; + + case INS_sve_ld2d: + case INS_sve_ld3d: + case INS_sve_ld4d: + assert(opt == INS_OPTS_SCALABLE_D); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_IS_3A; + break; + + case INS_sve_st2q: + case INS_sve_st3q: + case INS_sve_st4q: + assert(opt == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_st2q: + assert(isValidSimm4_MultipleOf2(imm)); + break; + + case INS_sve_st3q: + assert(isValidSimm4_MultipleOf3(imm)); + break; + + case INS_sve_st4q: + assert(isValidSimm4_MultipleOf4(imm)); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_JE_3A; + break; + default: unreached(); break; @@ -15302,15 +15430,30 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) switch (ins) { + case INS_sve_ld2b: + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: case INS_sve_ld2q: + case INS_sve_st2q: code |= insEncodeSimm4_MultipleOf2_19_to_16(imm); // iiii break; + case INS_sve_ld3b: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: case INS_sve_ld3q: + case INS_sve_st3q: code |= insEncodeSimm4_MultipleOf3_19_to_16(imm); // iiii break; + case INS_sve_ld4b: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: case INS_sve_ld4q: + case INS_sve_st4q: code |= insEncodeSimm4_MultipleOf4_19_to_16(imm); // iiii break; @@ -17758,6 +17901,7 @@ void emitter::emitDispInsHelp( break; // { .D }, /Z, [{, #, MUL VL}] + // Some of these formats may allow changing the element size instead of using 'D' for all instructions. case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus @@ -17789,7 +17933,22 @@ void emitter::emitDispInsHelp( // { .Q, .Q, .Q, .Q }, /Z, [{, #, MUL VL}] case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus // immediate) + // { .B, .B }, /Z, [{, #, MUL VL}] + // { .H, .H }, /Z, [{, #, MUL VL}] + // { .S, .S }, /Z, [{, #, MUL VL}] + // { .D, .D }, /Z, [{, #, MUL VL}] + // { .B, .B, .B }, /Z, [{, #, MUL VL}] + // { .H, .H, .H }, /Z, [{, #, MUL VL}] + // { .S, .S, .S }, /Z, [{, #, MUL VL}] + // { .D, .D, .D }, /Z, [{, #, MUL VL}] + // { .B, .B, .B, .B }, /Z, [{, #, MUL VL}] + // { .H, .H, .H, .H }, /Z, [{, #, MUL VL}] + // { .S, .S, .S, .S }, /Z, [{, #, MUL VL}] + // { .D, .D, .D, .D }, /Z, [{, #, MUL VL}] case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + // { .Q, .Q }, , [{, #, MUL VL}] + // { .Q, .Q, .Q }, , [{, #, MUL VL}] + // { .Q, .Q, .Q, .Q }, , [{, #, MUL VL}] case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus // immediate) case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus @@ -17800,15 +17959,30 @@ void emitter::emitDispInsHelp( imm = emitGetInsSC(id); switch (ins) { + case INS_sve_ld2b: + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: case INS_sve_ld2q: + case INS_sve_st2q: emitDispSveRegList(id->idReg1(), 2, id->idInsOpt(), true); // ttttt break; + case INS_sve_ld3b: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: case INS_sve_ld3q: + case INS_sve_st3q: emitDispSveRegList(id->idReg1(), 3, id->idInsOpt(), true); // ttttt break; + case INS_sve_ld4b: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: case INS_sve_ld4q: + case INS_sve_st4q: emitDispSveRegList(id->idReg1(), 4, id->idInsOpt(), true); // ttttt break; @@ -17816,7 +17990,18 @@ void emitter::emitDispInsHelp( emitDispSveRegList(id->idReg1(), 1, id->idInsOpt(), true); // ttttt break; } - emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, true); // ggg + switch (ins) + { + case INS_sve_st2q: + case INS_sve_st3q: + case INS_sve_st4q: + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + break; + + default: + emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, true); // ggg + break; + } printf("["); emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn if (imm != 0) @@ -17831,6 +18016,8 @@ void emitter::emitDispInsHelp( break; case IF_SVE_IQ_3A: + case IF_SVE_IS_3A: + case IF_SVE_JE_3A: // This does not have to be printed as hex. // We only do it because the capstone disassembly displays this immediate as hex. // We could not modify capstone without affecting other cases. @@ -20462,6 +20649,87 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + switch (ins) + { + case INS_sve_ld2b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3b: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4b: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3h: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4h: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3w: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4w: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3d: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4d: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + switch (ins) + { + case INS_sve_st2q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_st3q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_st4q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); From 5e1c2f4b6b710a4f432ad1c1d4772d1e4e4a721c Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 14 Dec 2023 15:20:17 -0800 Subject: [PATCH 19/37] Added SVE_JM_3A format --- src/coreclr/jit/codegenarm64.cpp | 10 ++++++ src/coreclr/jit/emitarm64.cpp | 52 ++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index c67eb4860b25b6..0fe15fe6a0e889 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10968,6 +10968,16 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_sve_st4q, EA_SCALABLE, REG_V7, REG_P6, REG_R5, 28, INS_OPTS_SCALABLE_Q); // ST4Q {.Q, .Q, .Q, .Q }, , [{, // #, MUL VL}] + + // IF_SVE_JM_3A + theEmitter->emitIns_R_R_R_I(INS_sve_stnt1b, EA_SCALABLE, REG_V1, REG_P2, REG_R3, 4, + INS_OPTS_SCALABLE_B); // STNT1B {.B }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_stnt1d, EA_SCALABLE, REG_V8, REG_P7, REG_R6, 5, + INS_OPTS_SCALABLE_D); // STNT1D {.D }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_stnt1h, EA_SCALABLE, REG_V9, REG_P1, REG_R0, -5, + INS_OPTS_SCALABLE_H); // STNT1H {.H }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_stnt1w, EA_SCALABLE, REG_V0, REG_P0, REG_R2, -7, + INS_OPTS_SCALABLE_S); // STNT1W {.S }, , [{, #, MUL VL}] #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index eaf7a863004f33..cd65377f01ff45 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9869,6 +9869,44 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_JE_3A; break; + case INS_sve_stnt1b: + case INS_sve_stnt1h: + case INS_sve_stnt1w: + case INS_sve_stnt1d: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_stnt1b: + assert(opt == INS_OPTS_SCALABLE_B); + break; + + case INS_sve_stnt1h: + assert(opt == INS_OPTS_SCALABLE_H); + break; + + case INS_sve_stnt1w: + assert(opt == INS_OPTS_SCALABLE_S); + break; + + case INS_sve_stnt1d: + assert(opt == INS_OPTS_SCALABLE_D); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_JM_3A; + break; + default: unreached(); break; @@ -17951,6 +17989,10 @@ void emitter::emitDispInsHelp( // { .Q, .Q, .Q, .Q }, , [{, #, MUL VL}] case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus // immediate) + // { .B }, , [{, #, MUL VL}] + // { .H }, , [{, #, MUL VL}] + // { .S }, , [{, #, MUL VL}] + // { .D }, , [{, #, MUL VL}] case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus // immediate) case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) @@ -17995,6 +18037,10 @@ void emitter::emitDispInsHelp( case INS_sve_st2q: case INS_sve_st3q: case INS_sve_st4q: + case INS_sve_stnt1b: + case INS_sve_stnt1h: + case INS_sve_stnt1w: + case INS_sve_stnt1d: emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg break; @@ -20730,6 +20776,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); From aea7f1108e97b6417c14651f390d79fa1e613e23 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 14 Dec 2023 15:56:08 -0800 Subject: [PATCH 20/37] Added SVE_JN_3C, SVE_JN_3C_D and SVE_JO_3A formats --- src/coreclr/jit/codegenarm64.cpp | 44 ++++++ src/coreclr/jit/emitarm64.cpp | 241 +++++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 0fe15fe6a0e889..2b4ae7065928d7 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10978,6 +10978,50 @@ void CodeGen::genArm64EmitterUnitTests() INS_OPTS_SCALABLE_H); // STNT1H {.H }, , [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_stnt1w, EA_SCALABLE, REG_V0, REG_P0, REG_R2, -7, INS_OPTS_SCALABLE_S); // STNT1W {.S }, , [{, #, MUL VL}] + + // IF_SVE_JN_3C + theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V1, REG_P2, REG_R3, 4, + INS_OPTS_SCALABLE_D); // ST1D {.D }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V3, REG_P4, REG_R5, 6, + INS_OPTS_SCALABLE_Q); // ST1W {.Q }, , [{, #, MUL VL}] + + // IF_SVE_JN_3C_D + theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V2, REG_P1, REG_R0, 0, + INS_OPTS_SCALABLE_Q); // ST1D {.Q }, , [{, #, MUL VL}] + + // IF_SVE_JO_3A + theEmitter->emitIns_R_R_R_I(INS_sve_st2b, EA_SCALABLE, REG_V0, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_B); // ST2B {.B, .B }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st2d, EA_SCALABLE, REG_V5, REG_P4, REG_R3, -16, + INS_OPTS_SCALABLE_D); // ST2D {.D, .D }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st2h, EA_SCALABLE, REG_V6, REG_P7, REG_R8, -16, + INS_OPTS_SCALABLE_H); // ST2H {.H, .H }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st2w, EA_SCALABLE, REG_V8, REG_P1, REG_R9, -16, + INS_OPTS_SCALABLE_S); // ST2W {.S, .S }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st3b, EA_SCALABLE, REG_V7, REG_P6, REG_R5, -24, + INS_OPTS_SCALABLE_B); // ST3B {.B, .B, .B }, , [{, #, + // MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st3d, EA_SCALABLE, REG_V2, REG_P3, REG_R4, -24, + INS_OPTS_SCALABLE_D); // ST3D {.D, .D, .D }, , [{, #, + // MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st3h, EA_SCALABLE, REG_V1, REG_P2, REG_R3, -24, + INS_OPTS_SCALABLE_H); // ST3H {.H, .H, .H }, , [{, #, + // MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st3w, EA_SCALABLE, REG_V1, REG_P3, REG_R8, -24, + INS_OPTS_SCALABLE_S); // ST3W {.S, .S, .S }, , [{, #, + // MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4b, EA_SCALABLE, REG_V0, REG_P0, REG_R0, -32, + INS_OPTS_SCALABLE_B); // ST4B {.B, .B, .B, .B }, , [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4d, EA_SCALABLE, REG_V2, REG_P0, REG_R1, -32, + INS_OPTS_SCALABLE_D); // ST4D {.D, .D, .D, .D }, , [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4h, EA_SCALABLE, REG_V3, REG_P5, REG_R2, -32, + INS_OPTS_SCALABLE_H); // ST4H {.H, .H, .H, .H }, , [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4w, EA_SCALABLE, REG_V0, REG_P1, REG_R5, 28, + INS_OPTS_SCALABLE_S); // ST4W {.S, .S, .S, .S }, , [{, + // #, MUL VL}] #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index cd65377f01ff45..30eef9f5cb1926 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1259,6 +1259,10 @@ void emitter::emitInsSanityCheck(instrDesc* id) case INS_sve_ld2w: case INS_sve_ld2d: case INS_sve_ld2q: + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: case INS_sve_st2q: assert(isValidSimm4_MultipleOf2(emitGetInsSC(id))); // iiii break; @@ -1268,6 +1272,10 @@ void emitter::emitInsSanityCheck(instrDesc* id) case INS_sve_ld3w: case INS_sve_ld3d: case INS_sve_ld3q: + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: case INS_sve_st3q: assert(isValidSimm4_MultipleOf3(emitGetInsSC(id))); // iiii break; @@ -1277,6 +1285,10 @@ void emitter::emitInsSanityCheck(instrDesc* id) case INS_sve_ld4w: case INS_sve_ld4d: case INS_sve_ld4q: + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: case INS_sve_st4q: assert(isValidSimm4_MultipleOf4(emitGetInsSC(id))); // iiii break; @@ -9907,6 +9919,114 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_JM_3A; break; + case INS_sve_st1w: + case INS_sve_st1d: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + + if (opt == INS_OPTS_SCALABLE_Q && (ins == INS_sve_st1d)) + { + fmt = IF_SVE_JN_3C_D; + } + else + { +#if DEBUG + if (ins == INS_sve_st1w) + { + assert(opt == INS_OPTS_SCALABLE_Q); + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + } +#endif // DEBUG + fmt = IF_SVE_JN_3C; + } + break; + + case INS_sve_st2b: + case INS_sve_st3b: + case INS_sve_st4b: + case INS_sve_st2h: + case INS_sve_st3h: + case INS_sve_st4h: + case INS_sve_st2w: + case INS_sve_st3w: + case INS_sve_st4w: + case INS_sve_st2d: + case INS_sve_st3d: + case INS_sve_st4d: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: + assert(isValidSimm4_MultipleOf2(imm)); + break; + + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: + assert(isValidSimm4_MultipleOf3(imm)); + break; + + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: + assert(isValidSimm4_MultipleOf4(imm)); + break; + + default: + assert(!"Invalid instruction"); + break; + } + + switch (ins) + { + case INS_sve_st2b: + case INS_sve_st3b: + case INS_sve_st4b: + assert(opt == INS_OPTS_SCALABLE_B); + break; + + case INS_sve_st2h: + case INS_sve_st3h: + case INS_sve_st4h: + assert(opt == INS_OPTS_SCALABLE_H); + break; + + case INS_sve_st2w: + case INS_sve_st3w: + case INS_sve_st4w: + assert(opt == INS_OPTS_SCALABLE_S); + break; + + case INS_sve_st2d: + case INS_sve_st3d: + case INS_sve_st4d: + assert(opt == INS_OPTS_SCALABLE_D); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_JO_3A; + break; + default: unreached(); break; @@ -15473,6 +15593,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case INS_sve_ld2w: case INS_sve_ld2d: case INS_sve_ld2q: + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: case INS_sve_st2q: code |= insEncodeSimm4_MultipleOf2_19_to_16(imm); // iiii break; @@ -15482,6 +15606,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case INS_sve_ld3w: case INS_sve_ld3d: case INS_sve_ld3q: + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: case INS_sve_st3q: code |= insEncodeSimm4_MultipleOf3_19_to_16(imm); // iiii break; @@ -15491,6 +15619,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case INS_sve_ld4w: case INS_sve_ld4d: case INS_sve_ld4q: + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: case INS_sve_st4q: code |= insEncodeSimm4_MultipleOf4_19_to_16(imm); // iiii break; @@ -17995,8 +18127,22 @@ void emitter::emitDispInsHelp( // { .D }, , [{, #, MUL VL}] case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus // immediate) + // { .D }, , [{, #, MUL VL}] + // { .Q }, , [{, #, MUL VL}] case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + // { .B, .B }, , [{, #, MUL VL}] + // { .H, .H }, , [{, #, MUL VL}] + // { .S, .S }, , [{, #, MUL VL}] + // { .D, .D }, , [{, #, MUL VL}] + // { .B, .B, .B }, , [{, #, MUL VL}] + // { .H, .H, .H }, , [{, #, MUL VL}] + // { .S, .S, .S }, , [{, #, MUL VL}] + // { .D, .D, .D }, , [{, #, MUL VL}] + // { .B, .B, .B, .B }, , [{, #, MUL VL}] + // { .H, .H, .H, .H }, , [{, #, MUL VL}] + // { .S, .S, .S, .S }, , [{, #, MUL VL}] + // { .D, .D, .D, .D }, , [{, #, MUL VL}] case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) imm = emitGetInsSC(id); switch (ins) @@ -18006,6 +18152,10 @@ void emitter::emitDispInsHelp( case INS_sve_ld2w: case INS_sve_ld2d: case INS_sve_ld2q: + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: case INS_sve_st2q: emitDispSveRegList(id->idReg1(), 2, id->idInsOpt(), true); // ttttt break; @@ -18015,6 +18165,10 @@ void emitter::emitDispInsHelp( case INS_sve_ld3w: case INS_sve_ld3d: case INS_sve_ld3q: + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: case INS_sve_st3q: emitDispSveRegList(id->idReg1(), 3, id->idInsOpt(), true); // ttttt break; @@ -18024,6 +18178,10 @@ void emitter::emitDispInsHelp( case INS_sve_ld4w: case INS_sve_ld4d: case INS_sve_ld4q: + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: case INS_sve_st4q: emitDispSveRegList(id->idReg1(), 4, id->idInsOpt(), true); // ttttt break; @@ -18034,13 +18192,27 @@ void emitter::emitDispInsHelp( } switch (ins) { + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: case INS_sve_st2q: + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: case INS_sve_st3q: + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: case INS_sve_st4q: case INS_sve_stnt1b: case INS_sve_stnt1h: case INS_sve_stnt1w: case INS_sve_stnt1d: + case INS_sve_st1d: + case INS_sve_st1w: emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg break; @@ -18064,6 +18236,7 @@ void emitter::emitDispInsHelp( case IF_SVE_IQ_3A: case IF_SVE_IS_3A: case IF_SVE_JE_3A: + case IF_SVE_JO_3A: // This does not have to be printed as hex. // We only do it because the capstone disassembly displays this immediate as hex. // We could not modify capstone without affecting other cases. @@ -20782,6 +20955,74 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + switch (ins) + { + case INS_sve_st2b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3b: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4b: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3h: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4h: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3w: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4w: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3d: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4d: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); From 3efdcbdd6dce02678d37179ed24892fdb970bf48 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 14 Dec 2023 16:05:55 -0800 Subject: [PATCH 21/37] Cleanup --- src/coreclr/jit/codegenarm64.cpp | 2 +- src/coreclr/jit/codegencommon.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 2b4ae7065928d7..c76e2c53277c1a 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5436,7 +5436,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) #define ALL_ARM64_EMITTER_UNIT_TESTS // #define ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // #define ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#define ALL_ARM64_EMITTER_UNIT_TESTS_SVE +// #define ALL_ARM64_EMITTER_UNIT_TESTS_SVE // #define ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED #if defined(DEBUG) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index aa4a929d739b40..bf2bb4be240580 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2100,7 +2100,7 @@ void CodeGen::genEmitUnwindDebugGCandEH() #endif // LATE_DISASM #ifdef DEBUG - if (compiler->opts.altJit && JitConfig.JitRawHexCode().contains(compiler->info.compMethodHnd, compiler->info.compClassHnd, + if (JitConfig.JitRawHexCode().contains(compiler->info.compMethodHnd, compiler->info.compClassHnd, &compiler->info.compMethodInfo->args)) { BYTE* addr = (BYTE*)*codePtr + compiler->GetEmitter()->writeableOffset; From de0fc85df2b7f1bf5b96b50ee43a73edc6560cee Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 14 Dec 2023 16:12:13 -0800 Subject: [PATCH 22/37] Added comments --- src/coreclr/jit/emitarm64.cpp | 12 ++++++------ src/coreclr/jit/emitarm64.h | 6 ++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 30eef9f5cb1926..0d97a9e60a1621 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -13408,7 +13408,7 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * TODO + * Returns the encoding for the immediate value as 4-bits at bit locations '19-16'. */ /*static*/ emitter::code_t emitter::insEncodeSimm4_19_to_16(ssize_t imm) @@ -13423,7 +13423,7 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * TODO + * Returns the encoding for the immediate value that is a multiple of 2 as 4-bits at bit locations '19-16'. */ /*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf2_19_to_16(ssize_t imm) @@ -13434,7 +13434,7 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * TODO + * Returns the encoding for the immediate value that is a multiple of 3 as 4-bits at bit locations '19-16'. */ /*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf3_19_to_16(ssize_t imm) @@ -13445,7 +13445,7 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * TODO + * Returns the encoding for the immediate value that is a multiple of 4 as 4-bits at bit locations '19-16'. */ /*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf4_19_to_16(ssize_t imm) @@ -13456,7 +13456,7 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * TODO + * Returns the encoding for the immediate value that is a multiple of 16 as 4-bits at bit locations '19-16'. */ /*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm) @@ -13467,7 +13467,7 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * TODO + * Returns the encoding for the immediate value that is a multiple of 32 as 4-bits at bit locations '19-16'. */ /*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 4a6081f3580ecb..fa404f78373789 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -488,16 +488,22 @@ static bool canEncodeSveElemsize_dtype(instruction ins); // for the 'dtype' field. static code_t insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t code); +// Returns the encoding for the immediate value as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_19_to_16(ssize_t imm); +// Returns the encoding for the immediate value that is a multiple of 2 as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_MultipleOf2_19_to_16(ssize_t imm); +// Returns the encoding for the immediate value that is a multiple of 3 as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_MultipleOf3_19_to_16(ssize_t imm); +// Returns the encoding for the immediate value that is a multiple of 4 as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_MultipleOf4_19_to_16(ssize_t imm); +// Returns the encoding for the immediate value that is a multiple of 16 as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm); +// Returns the encoding for the immediate value that is a multiple of 32 as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm); // Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate. From b878adf7e51d53235e13a4fffb4943b34721e603 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 3 Jan 2024 10:40:35 -0800 Subject: [PATCH 23/37] Fix merge --- src/coreclr/jit/emitarm64.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 2f2208de330d55..fd908ea4bed8b9 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -13659,6 +13659,10 @@ void emitter::emitIns_Call(EmitCallType callType, { assert(isValidSimm4_MultipleOf32(imm)); return insEncodeSimm4_19_to_16(imm / 32); +} + +/***************************************************************************** + * * Returns the encoding to select the 4/8-byte width specifier * at bit location 22 for an Arm64 Sve instruction. */ @@ -18565,11 +18569,11 @@ void emitter::emitDispInsHelp( case INS_sve_stnt1d: case INS_sve_st1d: case INS_sve_st1w: - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg break; default: - emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, true); // ggg + emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg break; } printf("["); From ca3f5800e29b4ba2f9e37e7eb3957af90bc15d74 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 3 Jan 2024 12:23:04 -0800 Subject: [PATCH 24/37] Formatting --- src/coreclr/jit/codegenarm64test.cpp | 6 +++--- src/coreclr/jit/emitarm64.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 7d911ffca619a2..5b2ffbe6539c96 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5418,9 +5418,9 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R6, 1, INS_OPTS_SCALABLE_H); // LD1H {.H }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R6, 1, - INS_OPTS_SCALABLE_S); // LD1H {.S }, /Z, [{, #, MUL VL}] + INS_OPTS_SCALABLE_S); // LD1H {.S }, /Z, [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R6, 1, - INS_OPTS_SCALABLE_D); // LD1H {.D }, /Z, [{, #, MUL VL}] + INS_OPTS_SCALABLE_D); // LD1H {.D }, /Z, [{, #, MUL VL}] // IF_SVE_IL_3A theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1d, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, @@ -5492,7 +5492,7 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqh, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 112, INS_OPTS_SCALABLE_H); // LD1RQH {.H }, /Z, [{, #}] theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqw, EA_SCALABLE, REG_V3, REG_P2, REG_R1, -16, - INS_OPTS_SCALABLE_S); // LD1RQW {.S }, /Z, [{, #}] + INS_OPTS_SCALABLE_S); // LD1RQW {.S }, /Z, [{, #}] // IF_SVE_IQ_3A theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V0, REG_P1, REG_R2, -16, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index fd908ea4bed8b9..0aa10087a65c1c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9712,7 +9712,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnf1d: assert(opt == INS_OPTS_SCALABLE_D); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); fmt = IF_SVE_IL_3A; @@ -18452,8 +18452,8 @@ void emitter::emitDispInsHelp( // { .H }, /Z, [{, #}] // { .S }, /Z, [{, #}] // { .D }, /Z, [{, #}] - case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus - // immediate) + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) // { .Q, .Q }, /Z, [{, #, MUL VL}] // { .Q, .Q, .Q }, /Z, [{, #, MUL VL}] // { .Q, .Q, .Q, .Q }, /Z, [{, #, MUL VL}] @@ -18485,7 +18485,7 @@ void emitter::emitDispInsHelp( // immediate) // { .D }, , [{, #, MUL VL}] // { .Q }, , [{, #, MUL VL}] - case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) // { .B, .B }, , [{, #, MUL VL}] // { .H, .H }, , [{, #, MUL VL}] From 87f0d106009d1745cc8f570a041c76a973f00e5d Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 3 Jan 2024 15:10:16 -0800 Subject: [PATCH 25/37] Cleanup --- src/coreclr/jit/emitarm64.cpp | 302 +++++++++++++++++++++++++--------- src/coreclr/jit/emitarm64.h | 6 + 2 files changed, 233 insertions(+), 75 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 0aa10087a65c1c..b774947e679496 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -13478,6 +13478,231 @@ void emitter::emitIns_Call(EmitCallType callType, return encodedSize | imm3High | imm3Low; } +/***************************************************************************** + * + * Returns the register list size for the given instruction. + */ + +/*static*/ int emitter::insGetSveRegisterListSize(instruction ins) +{ + switch (ins) + { + case INS_sve_ld2b: + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: + case INS_sve_ld2q: + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: + case INS_sve_st2q: + return 2; + + case INS_sve_ld3b: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: + case INS_sve_ld3q: + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: + case INS_sve_st3q: + return 3; + + case INS_sve_ld4b: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: + case INS_sve_ld4q: + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: + case INS_sve_st4q: + return 4; + + default: + return 1; + } +} + +/***************************************************************************** + * + * Returns the predicate type for the given instruction's second register. + */ + +/*static*/ emitter::PredicateType emitter::insGetPredicateTypeReg2(instruction ins, insFormat fmt) +{ + switch (fmt) + { + case IF_SVE_BV_2A: + case IF_SVE_HW_4A: + case IF_SVE_HW_4A_A: + case IF_SVE_HW_4A_B: + case IF_SVE_HW_4A_C: + case IF_SVE_HW_4B: + case IF_SVE_HW_4B_D: + case IF_SVE_HX_3A_E: + case IF_SVE_IJ_3A_F: + case IF_SVE_IK_4A_G: + case IF_SVE_IJ_3A_G: + case IF_SVE_IK_4A_I: + case IF_SVE_IH_3A_F: + case IF_SVE_II_4A_H: + case IF_SVE_IH_3A: + case IF_SVE_IH_3A_A: + case IF_SVE_II_4A: + case IF_SVE_II_4A_B: + case IF_SVE_IU_4A: + case IF_SVE_IU_4A_C: + case IF_SVE_IU_4B: + case IF_SVE_IU_4B_D: + case IF_SVE_IV_3A: + case IF_SVE_IG_4A_F: + case IF_SVE_IG_4A_G: + case IF_SVE_IJ_3A: + case IF_SVE_IK_4A: + case IF_SVE_IU_4A_A: + case IF_SVE_IU_4B_B: + case IF_SVE_HX_3A_B: + case IF_SVE_IG_4A_D: + case IF_SVE_IG_4A_E: + case IF_SVE_IF_4A: + case IF_SVE_IF_4A_A: + case IF_SVE_IM_3A: + case IF_SVE_IN_4A: + case IF_SVE_CX_4A: + case IF_SVE_CX_4A_A: + case IF_SVE_CY_3A: + case IF_SVE_CY_3B: + case IF_SVE_IX_4A: + case IF_SVE_HI_3A: + case IF_SVE_HT_4A: + case IF_SVE_DG_2A: + case IF_SVE_IO_3A: + case IF_SVE_IP_4A: + case IF_SVE_IQ_3A: + case IF_SVE_IR_4A: + case IF_SVE_IS_3A: + case IF_SVE_IT_4A: + case IF_SVE_DA_4A: + case IF_SVE_DB_3B: + case IF_SVE_DC_3A: + case IF_SVE_GE_4A: + case IF_SVE_GI_4A: + case IF_SVE_IC_3A_C: + case IF_SVE_IC_3A: + case IF_SVE_IC_3A_B: + case IF_SVE_IC_3A_A: + case IF_SVE_IL_3A_C: + case IF_SVE_IL_3A: + case IF_SVE_IL_3A_B: + case IF_SVE_IL_3A_A: + case IF_SVE_IW_4A: + return PREDICATE_ZERO; + + case IF_SVE_BV_2A_J: + case IF_SVE_CP_3A: + case IF_SVE_CQ_3A: + case IF_SVE_CZ_4A_K: + case IF_SVE_AM_2A: + case IF_SVE_AN_3A: + case IF_SVE_AO_3A: + case IF_SVE_HL_3A: + case IF_SVE_HM_2A: + case IF_SVE_AA_3A: + case IF_SVE_BU_2A: + case IF_SVE_BV_2B: + case IF_SVE_HS_3A: + case IF_SVE_HS_3A_H: + case IF_SVE_HS_3A_I: + case IF_SVE_HS_3A_J: + case IF_SVE_HP_3B: + case IF_SVE_HP_3B_H: + case IF_SVE_HP_3B_I: + case IF_SVE_HP_3B_J: + case IF_SVE_AR_4A: + case IF_SVE_BV_2A_A: + case IF_SVE_AB_3A: + case IF_SVE_ET_3A: + case IF_SVE_HU_4A: + case IF_SVE_HL_3B: + case IF_SVE_AD_3A: + case IF_SVE_AB_3B: + case IF_SVE_AE_3A: + case IF_SVE_EU_3A: + case IF_SVE_GT_4A: + case IF_SVE_AP_3A: + case IF_SVE_HO_3A: + case IF_SVE_HO_3A_B: + case IF_SVE_GQ_3A: + case IF_SVE_HU_4B: + case IF_SVE_AQ_3A: + case IF_SVE_CU_3A: + case IF_SVE_AC_3A: + case IF_SVE_ER_3A: + case IF_SVE_GR_3A: + case IF_SVE_ES_3A: + case IF_SVE_HR_3A: + case IF_SVE_EP_3A: + case IF_SVE_GP_3A: + case IF_SVE_EQ_3A: + case IF_SVE_HQ_3A: + case IF_SVE_AS_4A: + case IF_SVE_CT_3A: + case IF_SVE_HP_3A: + case IF_SVE_HV_4A: + return PREDICATE_MERGE; + + case IF_SVE_CZ_4A_L: + case IF_SVE_CF_2A: + case IF_SVE_CF_2B: + case IF_SVE_CF_2C: + case IF_SVE_CF_2D: + case IF_SVE_CI_3A: + case IF_SVE_DL_2A: + case IF_SVE_DM_2A: + case IF_SVE_DN_2A: + case IF_SVE_DO_2A: + case IF_SVE_DP_2A: + case IF_SVE_CK_2A: + case IF_SVE_DI_2A: + return PREDICATE_SIZED; + + // This is a special case as the second register could be ZERO or MERGE. + // / + // Therefore, by default return NONE due to ambiguity. + case IF_SVE_AH_3A: + case IF_SVE_DB_3A: + return PREDICATE_NONE; + + case IF_SVE_CW_4A: + switch (ins) + { + case INS_sve_sel: + return PREDICATE_NONE; + + default: + return PREDICATE_MERGE; + } + + case IF_SVE_CZ_4A: + switch (ins) + { + case INS_sve_sel: + return PREDICATE_NONE; + + default: + return PREDICATE_ZERO; + } + + default: + return PREDICATE_NONE; + } +} + /***************************************************************************** * * Returns true if the specified instruction can encode the 'dtype' field. @@ -18501,81 +18726,8 @@ void emitter::emitDispInsHelp( // { .D, .D, .D, .D }, , [{, #, MUL VL}] case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) imm = emitGetInsSC(id); - switch (ins) - { - case INS_sve_ld2b: - case INS_sve_ld2h: - case INS_sve_ld2w: - case INS_sve_ld2d: - case INS_sve_ld2q: - case INS_sve_st2b: - case INS_sve_st2h: - case INS_sve_st2w: - case INS_sve_st2d: - case INS_sve_st2q: - emitDispSveRegList(id->idReg1(), 2, id->idInsOpt(), true); // ttttt - break; - - case INS_sve_ld3b: - case INS_sve_ld3h: - case INS_sve_ld3w: - case INS_sve_ld3d: - case INS_sve_ld3q: - case INS_sve_st3b: - case INS_sve_st3h: - case INS_sve_st3w: - case INS_sve_st3d: - case INS_sve_st3q: - emitDispSveRegList(id->idReg1(), 3, id->idInsOpt(), true); // ttttt - break; - - case INS_sve_ld4b: - case INS_sve_ld4h: - case INS_sve_ld4w: - case INS_sve_ld4d: - case INS_sve_ld4q: - case INS_sve_st4b: - case INS_sve_st4h: - case INS_sve_st4w: - case INS_sve_st4d: - case INS_sve_st4q: - emitDispSveRegList(id->idReg1(), 4, id->idInsOpt(), true); // ttttt - break; - - default: - emitDispSveRegList(id->idReg1(), 1, id->idInsOpt(), true); // ttttt - break; - } - switch (ins) - { - case INS_sve_st2b: - case INS_sve_st2h: - case INS_sve_st2w: - case INS_sve_st2d: - case INS_sve_st2q: - case INS_sve_st3b: - case INS_sve_st3h: - case INS_sve_st3w: - case INS_sve_st3d: - case INS_sve_st3q: - case INS_sve_st4b: - case INS_sve_st4h: - case INS_sve_st4w: - case INS_sve_st4d: - case INS_sve_st4q: - case INS_sve_stnt1b: - case INS_sve_stnt1h: - case INS_sve_stnt1w: - case INS_sve_stnt1d: - case INS_sve_st1d: - case INS_sve_st1w: - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg - break; - - default: - emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg - break; - } + emitDispSveRegList(id->idReg1(), insGetSveRegisterListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateTypeReg2(ins, fmt), id->idInsOpt(), true); // ggg printf("["); emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn if (imm != 0) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 680de5cd8d9f48..62d18b6a54c7cf 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -482,6 +482,12 @@ static code_t insEncodeSveElemsize(emitAttr size); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); +// Returns the register list size for the given instruction. +static int insGetSveRegisterListSize(instruction ins); + +// Returns the predicate type for the given instruction's second register. +static PredicateType insGetPredicateTypeReg2(instruction ins, insFormat fmt); + // Returns true if the specified instruction can encode the 'dtype' field. static bool canEncodeSveElemsize_dtype(instruction ins); From 042f53055d34b1dd68f7e919d2775c046268c826 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 3 Jan 2024 16:48:30 -0800 Subject: [PATCH 26/37] More cleanup --- src/coreclr/jit/emitarm64.cpp | 181 ++++++++++++++++++++++++++++++++-- src/coreclr/jit/emitarm64.h | 11 ++- 2 files changed, 180 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index b774947e679496..fe9ec3af183dce 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -13480,13 +13480,66 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * Returns the register list size for the given instruction. + * Returns the register list size for the given SVE instruction. */ -/*static*/ int emitter::insGetSveRegisterListSize(instruction ins) +/*static*/ int emitter::insGetSveReg1ListSize(instruction ins) { switch (ins) { + case INS_sve_ld1d: + case INS_sve_ld1w: + case INS_sve_ld1sw: + case INS_sve_ld1sb: + case INS_sve_ld1b: + case INS_sve_ld1sh: + case INS_sve_ld1h: + case INS_sve_ldnf1d: + case INS_sve_ldnf1sw: + case INS_sve_ldnf1sh: + case INS_sve_ldnf1w: + case INS_sve_ldnf1h: + case INS_sve_ldnf1sb: + case INS_sve_ldnf1b: + case INS_sve_ldnt1b: + case INS_sve_ldnt1d: + case INS_sve_ldnt1h: + case INS_sve_ldnt1w: + case INS_sve_ld1rob: + case INS_sve_ld1rod: + case INS_sve_ld1roh: + case INS_sve_ld1row: + case INS_sve_ld1rqb: + case INS_sve_ld1rqd: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + case INS_sve_stnt1b: + case INS_sve_stnt1d: + case INS_sve_stnt1h: + case INS_sve_stnt1w: + case INS_sve_st1d: + case INS_sve_st1w: + case INS_sve_ldff1sh: + case INS_sve_ldff1w: + case INS_sve_ldff1h: + case INS_sve_ldff1d: + case INS_sve_ldff1sw: + case INS_sve_st1b: + case INS_sve_ldff1sb: + case INS_sve_ldff1b: + case INS_sve_ldnt1sb: + case INS_sve_ldnt1sh: + case INS_sve_ld1rd: + case INS_sve_ld1rsw: + case INS_sve_ld1rh: + case INS_sve_ld1rsb: + case INS_sve_ld1rsh: + case INS_sve_ld1rw: + case INS_sve_ld1q: + case INS_sve_ldnt1sw: + case INS_sve_st1q: + return 1; + case INS_sve_ld2b: case INS_sve_ld2h: case INS_sve_ld2w: @@ -13497,6 +13550,14 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_sve_st2w: case INS_sve_st2d: case INS_sve_st2q: + case INS_sve_whilege: // SVE_DX_3A + case INS_sve_whilegt: // SVE_DX_3A + case INS_sve_whilehi: // SVE_DX_3A + case INS_sve_whilehs: // SVE_DX_3A + case INS_sve_whilele: // SVE_DX_3A + case INS_sve_whilels: // SVE_DX_3A + case INS_sve_whilelt: // SVE_DX_3A + case INS_sve_pext: // SVE_DW_2B return 2; case INS_sve_ld3b: @@ -13524,16 +13585,17 @@ void emitter::emitIns_Call(EmitCallType callType, return 4; default: + assert(!"Unexpected instruction"); return 1; } } /***************************************************************************** * - * Returns the predicate type for the given instruction's second register. + * Returns the predicate type for the given SVE instruction's second register. */ -/*static*/ emitter::PredicateType emitter::insGetPredicateTypeReg2(instruction ins, insFormat fmt) +/*static*/ emitter::PredicateType emitter::insGetSveReg2PredicateType(insFormat fmt) { switch (fmt) { @@ -13656,6 +13718,7 @@ void emitter::emitIns_Call(EmitCallType callType, case IF_SVE_HV_4A: return PREDICATE_MERGE; + case IF_SVE_CZ_4A_A: case IF_SVE_CZ_4A_L: case IF_SVE_CF_2A: case IF_SVE_CF_2B: @@ -13676,16 +13739,97 @@ void emitter::emitIns_Call(EmitCallType callType, // Therefore, by default return NONE due to ambiguity. case IF_SVE_AH_3A: case IF_SVE_DB_3A: + // TODO: Handle these cases. + break; + + case IF_SVE_JD_4B: + case IF_SVE_JD_4C: + case IF_SVE_JI_3A_A: + case IF_SVE_JJ_4A: + case IF_SVE_JJ_4A_B: + case IF_SVE_JJ_4A_C: + case IF_SVE_JJ_4A_D: + case IF_SVE_JJ_4B: + case IF_SVE_JJ_4B_E: + case IF_SVE_JN_3B: + case IF_SVE_JN_3C: + case IF_SVE_JD_4A: + case IF_SVE_JN_3A: + case IF_SVE_JD_4C_A: + case IF_SVE_JJ_4B_C: + case IF_SVE_JL_3A: + case IF_SVE_JN_3C_D: + case IF_SVE_HY_3A: + case IF_SVE_HY_3A_A: + case IF_SVE_HY_3B: + case IF_SVE_HZ_2A_B: + case IF_SVE_IA_2A: + case IF_SVE_IB_3A: + case IF_SVE_JK_4A: + case IF_SVE_JK_4A_B: + case IF_SVE_JK_4B: + case IF_SVE_IZ_4A: + case IF_SVE_IZ_4A_A: + case IF_SVE_JB_4A: + case IF_SVE_JM_3A: + case IF_SVE_CM_3A: + case IF_SVE_CN_3A: + case IF_SVE_CO_3A: + case IF_SVE_JA_4A: + case IF_SVE_CR_3A: + case IF_SVE_CS_3A: + case IF_SVE_CV_3A: + case IF_SVE_CV_3B: + case IF_SVE_DK_3A: + case IF_SVE_DW_2A: // [] + case IF_SVE_DW_2B: // [] + case IF_SVE_JC_4A: + case IF_SVE_JO_3A: + case IF_SVE_JE_3A: + case IF_SVE_JF_4A: + case IF_SVE_AK_3A: + case IF_SVE_HE_3A: + case IF_SVE_AF_3A: + case IF_SVE_AG_3A: + case IF_SVE_AI_3A: + case IF_SVE_AJ_3A: + case IF_SVE_AL_3A: + case IF_SVE_CL_3A: + case IF_SVE_DD_2A: + case IF_SVE_DF_2A: + case IF_SVE_GS_3A: + case IF_SVE_HJ_3A: + case IF_SVE_IY_4A: return PREDICATE_NONE; + default: + break; + } + + assert(!"Unexpected instruction format"); + return PREDICATE_NONE; +} + +/***************************************************************************** + * + * Returns the predicate type for the given SVE instruction's second register. + */ + +/*static*/ emitter::PredicateType emitter::insGetSveReg2PredicateType(instruction ins, insFormat fmt) +{ + switch (fmt) + { case IF_SVE_CW_4A: switch (ins) { case INS_sve_sel: return PREDICATE_NONE; + case INS_sve_mov: + return PREDICATE_ZERO; + default: - return PREDICATE_MERGE; + break; } case IF_SVE_CZ_4A: @@ -13694,11 +13838,32 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_sve_sel: return PREDICATE_NONE; - default: + case INS_sve_mov: + case INS_sve_movs: + case INS_sve_and: + case INS_sve_bic: + case INS_sve_eor: + case INS_sve_orr: + case INS_sve_orn: + case INS_sve_not: + case INS_sve_ands: + case INS_sve_bics: + case INS_sve_eors: + case INS_sve_nand: + case INS_sve_nands: + case INS_sve_nor: + case INS_sve_nors: + case INS_sve_nots: + case INS_sve_orns: + case INS_sve_orrs: return PREDICATE_ZERO; + + default: + break; } default: + assert(!"Unexpected instruction and format"); return PREDICATE_NONE; } } @@ -18726,8 +18891,8 @@ void emitter::emitDispInsHelp( // { .D, .D, .D, .D }, , [{, #, MUL VL}] case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) imm = emitGetInsSC(id); - emitDispSveRegList(id->idReg1(), insGetSveRegisterListSize(ins), id->idInsOpt(), true); // ttttt - emitDispPredicateReg(id->idReg2(), insGetPredicateTypeReg2(ins, fmt), id->idInsOpt(), true); // ggg + emitDispSveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetSveReg2PredicateType(fmt), id->idInsOpt(), true); // ggg printf("["); emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn if (imm != 0) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 62d18b6a54c7cf..dcd2e257f66489 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -482,11 +482,14 @@ static code_t insEncodeSveElemsize(emitAttr size); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); -// Returns the register list size for the given instruction. -static int insGetSveRegisterListSize(instruction ins); +// Returns the first register list size for the given SVE instruction. +static int insGetSveReg1ListSize(instruction ins); -// Returns the predicate type for the given instruction's second register. -static PredicateType insGetPredicateTypeReg2(instruction ins, insFormat fmt); +// Returns the predicate type for the given SVE instruction's second register. +static PredicateType insGetSveReg2PredicateType(insFormat fmt); + +// Returns the predicate type for the given SVE instruction's second register. +static PredicateType insGetSveReg2PredicateType(instruction ins, insFormat fmt); // Returns true if the specified instruction can encode the 'dtype' field. static bool canEncodeSveElemsize_dtype(instruction ins); From 13e3250a2f50c0269a0eef6fb17cd676b5a8d974 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 3 Jan 2024 17:27:55 -0800 Subject: [PATCH 27/37] Fixed cases --- src/coreclr/jit/emitarm64.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index fe9ec3af183dce..2520b7ba072b92 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -13607,6 +13607,8 @@ void emitter::emitIns_Call(EmitCallType callType, case IF_SVE_HW_4B: case IF_SVE_HW_4B_D: case IF_SVE_HX_3A_E: + case IF_SVE_IJ_3A_D: + case IF_SVE_IJ_3A_E: case IF_SVE_IJ_3A_F: case IF_SVE_IK_4A_G: case IF_SVE_IJ_3A_G: From 5aa7a5aa543ec25a2e243fed2b94e3fd6d1e1034 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 3 Jan 2024 18:12:36 -0800 Subject: [PATCH 28/37] Formatting --- src/coreclr/jit/emitarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 2520b7ba072b92..dddb5eb85fccc1 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -18893,7 +18893,7 @@ void emitter::emitDispInsHelp( // { .D, .D, .D, .D }, , [{, #, MUL VL}] case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) imm = emitGetInsSC(id); - emitDispSveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispSveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt emitDispPredicateReg(id->idReg2(), insGetSveReg2PredicateType(fmt), id->idInsOpt(), true); // ggg printf("["); emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn From 39e45cc6bff5eba7ff1addf628c647efba53e13d Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 4 Jan 2024 09:50:05 -0800 Subject: [PATCH 29/37] Fix build --- src/coreclr/jit/emitarm64.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index dddb5eb85fccc1..5d7047feecdd47 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -13833,6 +13833,7 @@ void emitter::emitIns_Call(EmitCallType callType, default: break; } + break; case IF_SVE_CZ_4A: switch (ins) @@ -13863,6 +13864,7 @@ void emitter::emitIns_Call(EmitCallType callType, default: break; } + break; default: assert(!"Unexpected instruction and format"); From 13386772d117006fe51c81dacfd341e2fa504330 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 4 Jan 2024 12:00:11 -0800 Subject: [PATCH 30/37] Fix build --- src/coreclr/jit/emitarm64.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5d7047feecdd47..4dfbe39d142b0c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -13867,9 +13867,11 @@ void emitter::emitIns_Call(EmitCallType callType, break; default: - assert(!"Unexpected instruction and format"); - return PREDICATE_NONE; + break; } + + assert(!"Unexpected instruction and format"); + return PREDICATE_NONE; } /***************************************************************************** From 895fa9bb1bae4f44498c4b0c89f3de2c2db66eec Mon Sep 17 00:00:00 2001 From: TIHan Date: Tue, 9 Jan 2024 15:05:34 -0800 Subject: [PATCH 31/37] Feedback --- src/coreclr/jit/emitarm64.cpp | 114 +++++++++++-------------------- src/coreclr/jit/emitarm64.h | 3 - src/coreclr/jit/instrsarm64sve.h | 52 +++++++------- 3 files changed, 66 insertions(+), 103 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 4dfbe39d142b0c..82b559a01c9efe 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -13812,68 +13812,6 @@ void emitter::emitIns_Call(EmitCallType callType, return PREDICATE_NONE; } -/***************************************************************************** - * - * Returns the predicate type for the given SVE instruction's second register. - */ - -/*static*/ emitter::PredicateType emitter::insGetSveReg2PredicateType(instruction ins, insFormat fmt) -{ - switch (fmt) - { - case IF_SVE_CW_4A: - switch (ins) - { - case INS_sve_sel: - return PREDICATE_NONE; - - case INS_sve_mov: - return PREDICATE_ZERO; - - default: - break; - } - break; - - case IF_SVE_CZ_4A: - switch (ins) - { - case INS_sve_sel: - return PREDICATE_NONE; - - case INS_sve_mov: - case INS_sve_movs: - case INS_sve_and: - case INS_sve_bic: - case INS_sve_eor: - case INS_sve_orr: - case INS_sve_orn: - case INS_sve_not: - case INS_sve_ands: - case INS_sve_bics: - case INS_sve_eors: - case INS_sve_nand: - case INS_sve_nands: - case INS_sve_nor: - case INS_sve_nors: - case INS_sve_nots: - case INS_sve_orns: - case INS_sve_orrs: - return PREDICATE_ZERO; - - default: - break; - } - break; - - default: - break; - } - - assert(!"Unexpected instruction and format"); - return PREDICATE_NONE; -} - /***************************************************************************** * * Returns true if the specified instruction can encode the 'dtype' field. @@ -13916,7 +13854,7 @@ void emitter::emitIns_Call(EmitCallType callType, { case INS_sve_ld1b: case INS_sve_ldnf1b: - return code & ~((1 << 22) | (1 << 21)); // Set bit '22' and '21' to 0. + return code; // By default, the instruction already encodes 8-bit. default: assert(!"Invalid instruction for encoding dtype."); @@ -13928,9 +13866,9 @@ void emitter::emitIns_Call(EmitCallType callType, { case INS_sve_ld1b: case INS_sve_ld1h: - case INS_sve_ldnf1h: case INS_sve_ldnf1b: - return code & ~(1 << 22); // Set bit '22' to 0. + case INS_sve_ldnf1h: + return code | (1 << 21); // Set bit '21' to 1. case INS_sve_ld1sb: case INS_sve_ldnf1sb: @@ -13945,17 +13883,23 @@ void emitter::emitIns_Call(EmitCallType callType, switch (ins) { case INS_sve_ld1w: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for S. + return (code | (1 << 15)) | (1 << 22); // Set bit '22' and '15' to 1. + + case INS_sve_ldnf1w: + return code; // By default, the instruction already encodes 32-bit. + case INS_sve_ld1b: case INS_sve_ld1h: - case INS_sve_ldnf1w: - case INS_sve_ldnf1h: case INS_sve_ldnf1b: - return code & ~(1 << 21); // Set bit '21' to 0. + case INS_sve_ldnf1h: + return code | (1 << 22); // Set bit '22' to 1. case INS_sve_ld1sb: case INS_sve_ld1sh: - case INS_sve_ldnf1sh: case INS_sve_ldnf1sb: + case INS_sve_ldnf1sh: return code | (1 << 21); // Set bit '21' to 1. default: @@ -13964,16 +13908,38 @@ void emitter::emitIns_Call(EmitCallType callType, return code; case EA_8BYTE: - return code; // By default, the instruction already encodes 64-bit. + switch (ins) + { + case INS_sve_ld1w: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for D. + return ((code | (1 << 15)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '15' to 1. + + case INS_sve_ldnf1w: + return code | (1 << 21); // Set bit '21' to 1. Set bit '15' to 1. + + case INS_sve_ld1b: + case INS_sve_ld1h: + case INS_sve_ldnf1b: + case INS_sve_ldnf1h: + return (code | (1 << 22)) | (1 << 21); // Set bit '22' and '21' to 1. + + case INS_sve_ld1sb: + case INS_sve_ld1sh: + case INS_sve_ldnf1sb: + case INS_sve_ldnf1sh: + return code; // By default, the instruction already encodes 64-bit. + + default: + assert(!"Invalid instruction for encoding dtype."); + } + return code; case EA_16BYTE: switch (ins) { case INS_sve_ld1w: - // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '0' to get the - // proper encoding for Q. - return (code & ~((1 << 22) | (1 << 21) | (1 << 15))) | - (1 << 20); // Set bits '22', '21' and '15' to 0. Set bit '20' to 1. + return code | (1 << 20); // Set bit '20' to 1. default: assert(!"Invalid instruction for encoding dtype."); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index dcd2e257f66489..eac2df71978050 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -488,9 +488,6 @@ static int insGetSveReg1ListSize(instruction ins); // Returns the predicate type for the given SVE instruction's second register. static PredicateType insGetSveReg2PredicateType(insFormat fmt); -// Returns the predicate type for the given SVE instruction's second register. -static PredicateType insGetSveReg2PredicateType(instruction ins, insFormat fmt); - // Returns true if the specified instruction can encode the 'dtype' field. static bool canEncodeSveElemsize_dtype(instruction ins); diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index 5a676899385a0c..ad5094bd141e36 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -8,20 +8,20 @@ * nm -- textual name (for assembly dipslay) * info -- miscellaneous instruction info (load/store/compare/ASIMD right shift) * fmt -- encoding format used by this instruction -* e1 -- encoding 1 -* e2 -- encoding 2 -* e3 -- encoding 3 -* e4 -- encoding 4 -* e5 -- encoding 5 -* e6 -- encoding 6 -* e7 -- encoding 7 -* e8 -- encoding 8 -* e9 -- encoding 9 -* e10 -- encoding 10 -* e11 -- encoding 11 -* e12 -- encoding 12 -* e13 -- encoding 13 -*****************************************************************************/ + * e1 -- encoding 1 + * e2 -- encoding 2 + * e3 -- encoding 3 + * e4 -- encoding 4 + * e5 -- encoding 5 + * e6 -- encoding 6 + * e7 -- encoding 7 + * e8 -- encoding 8 + * e9 -- encoding 9 + * e10 -- encoding 10 + * e11 -- encoding 11 + * e12 -- encoding 12 + * e13 -- encoding 13 + *****************************************************************************/ #if !defined(TARGET_ARM64) #error Unexpected target type #endif @@ -119,7 +119,7 @@ INST9(ld1sh, "ld1sh", 0, IF_SV // enum name info SVE_HW_4A SVE_HW_4A_A SVE_HW_4A_B SVE_HW_4A_C SVE_HW_4B SVE_HW_4B_D SVE_HX_3A_E SVE_IJ_3A_G SVE_IK_4A_I -INST9(ld1h, "ld1h", 0, IF_SVE_9B, 0x84A04000, 0xC4A04000, 0xC4804000, 0x84804000, 0xC4E0C000, 0xC4C0C000, 0x84A0C000, 0xA4E0A000, 0xA4804000 ) +INST9(ld1h, "ld1h", 0, IF_SVE_9B, 0x84A04000, 0xC4A04000, 0xC4804000, 0x84804000, 0xC4E0C000, 0xC4C0C000, 0x84A0C000, 0xA480A000, 0xA4804000 ) // LD1H {.S }, /Z, [, .S, #1] SVE_HW_4A 100001001h1mmmmm 010gggnnnnnttttt 84A0 4000 // LD1H {.D }, /Z, [, .D, #1] SVE_HW_4A_A 110001001h1mmmmm 010gggnnnnnttttt C4A0 4000 // LD1H {.D }, /Z, [, .D, ] SVE_HW_4A_B 110001001h0mmmmm 010gggnnnnnttttt C480 4000 @@ -127,12 +127,12 @@ INST9(ld1h, "ld1h", 0, IF_SV // LD1H {.D }, /Z, [, .D, LSL #1] SVE_HW_4B 11000100111mmmmm 110gggnnnnnttttt C4E0 C000 // LD1H {.D }, /Z, [, .D] SVE_HW_4B_D 11000100110mmmmm 110gggnnnnnttttt C4C0 C000 // LD1H {.D }, /Z, [.D{, #}] SVE_HX_3A_E 10000100101iiiii 110gggnnnnnttttt 84A0 C000 - // LD1H {.D }, /Z, [{, #, MUL VL}] SVE_IJ_3A_G 101001001110iiii 101gggnnnnnttttt A4E0 A000 + // LD1H {.X }, /Z, [{, #, MUL VL}] SVE_IJ_3A_G 101001001000iiii 101gggnnnnnttttt A480 A000 // LD1H {.D }, /Z, [, , LSL #1] SVE_IK_4A_I 10100100100mmmmm 010gggnnnnnttttt A480 4000 // enum name info SVE_HW_4A SVE_HW_4A_A SVE_HW_4A_B SVE_HW_4A_C SVE_HW_4B SVE_HW_4B_D SVE_HX_3A_E SVE_IH_3A_F SVE_II_4A_H -INST9(ld1w, "ld1w", 0, IF_SVE_9C, 0x85204000, 0xC5204000, 0xC5004000, 0x85004000, 0xC560C000, 0xC540C000, 0x8520C000, 0xA560A000, 0xA5000000 ) +INST9(ld1w, "ld1w", 0, IF_SVE_9C, 0x85204000, 0xC5204000, 0xC5004000, 0x85004000, 0xC560C000, 0xC540C000, 0x8520C000, 0xA5002000, 0xA5000000 ) // LD1W {.S }, /Z, [, .S, #2] SVE_HW_4A 100001010h1mmmmm 010gggnnnnnttttt 8520 4000 // LD1W {.D }, /Z, [, .D, #2] SVE_HW_4A_A 110001010h1mmmmm 010gggnnnnnttttt C520 4000 // LD1W {.D }, /Z, [, .D, ] SVE_HW_4A_B 110001010h0mmmmm 010gggnnnnnttttt C500 4000 @@ -140,7 +140,7 @@ INST9(ld1w, "ld1w", 0, IF_SV // LD1W {.D }, /Z, [, .D, LSL #2] SVE_HW_4B 11000101011mmmmm 110gggnnnnnttttt C560 C000 // LD1W {.D }, /Z, [, .D] SVE_HW_4B_D 11000101010mmmmm 110gggnnnnnttttt C540 C000 // LD1W {.D }, /Z, [.D{, #}] SVE_HX_3A_E 10000101001iiiii 110gggnnnnnttttt 8520 C000 - // LD1W {.D }, /Z, [{, #, MUL VL}] SVE_IH_3A_F 101001010110iiii 101gggnnnnnttttt A560 A000 + // LD1W {.X }, /Z, [{, #, MUL VL}] SVE_IH_3A_F 101001010000iiii 001gggnnnnnttttt A500 2000 // LD1W {.D }, /Z, [, , LSL #2] SVE_II_4A_H 10100101000mmmmm 000gggnnnnnttttt A500 0000 @@ -271,12 +271,12 @@ INST6(ld1sb, "ld1sb", 0, IF_SV // enum name info SVE_HW_4A SVE_HW_4A_A SVE_HW_4B SVE_HX_3A_B SVE_IJ_3A_E SVE_IK_4A_H -INST6(ld1b, "ld1b", 0, IF_SVE_6D, 0xC4004000, 0x84004000, 0xC440C000, 0x8420C000, 0xA460A000, 0xA4004000 ) +INST6(ld1b, "ld1b", 0, IF_SVE_6D, 0xC4004000, 0x84004000, 0xC440C000, 0x8420C000, 0xA400A000, 0xA4004000 ) // LD1B {.D }, /Z, [, .D, ] SVE_HW_4A 110001000h0mmmmm 010gggnnnnnttttt C400 4000 // LD1B {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 010gggnnnnnttttt 8400 4000 // LD1B {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 110gggnnnnnttttt C440 C000 // LD1B {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 110gggnnnnnttttt 8420 C000 - // LD1B {.D }, /Z, [{, #, MUL VL}] SVE_IJ_3A_E 101001000110iiii 101gggnnnnnttttt A460 A000 + // LD1B {.B }, /Z, [{, #, MUL VL}] SVE_IJ_3A_E 101001000000iiii 101gggnnnnnttttt A400 A000 // LD1B {.D }, /Z, [, ] SVE_IK_4A_H 10100100000mmmmm 010gggnnnnnttttt A400 4000 @@ -2815,8 +2815,8 @@ INST1(ld1rw, "ld1rw", 0, IF_SV // enum name info SVE_IL_3A_C -INST1(ldnf1b, "ldnf1b", 0, IF_SVE_IL_3A_C, 0xA470A000 ) - // LDNF1B {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_C 101001000111iiii 101gggnnnnnttttt A470 A000 +INST1(ldnf1b, "ldnf1b", 0, IF_SVE_IL_3A_C, 0xA410A000 ) + // LDNF1B {.B }, /Z, [{, #, MUL VL}] SVE_IL_3A_C 101001000001iiii 101gggnnnnnttttt A410 A000 // enum name info SVE_IL_3A @@ -2828,8 +2828,8 @@ INST1(ldnf1sw, "ldnf1sw", 0, IF_SV // enum name info SVE_IL_3A_B -INST1(ldnf1h, "ldnf1h", 0, IF_SVE_IL_3A_B, 0xA4F0A000 ) - // LDNF1H {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_B 101001001111iiii 101gggnnnnnttttt A4F0 A000 +INST1(ldnf1h, "ldnf1h", 0, IF_SVE_IL_3A_B, 0xA490A000 ) + // LDNF1H {.X }, /Z, [{, #, MUL VL}] SVE_IL_3A_B 101001001001iiii 101gggnnnnnttttt A490 A000 INST1(ldnf1sb, "ldnf1sb", 0, IF_SVE_IL_3A_B, 0xA590A000 ) // LDNF1SB {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_B 101001011001iiii 101gggnnnnnttttt A590 A000 @@ -2839,8 +2839,8 @@ INST1(ldnf1sb, "ldnf1sb", 0, IF_SV INST1(ldnf1sh, "ldnf1sh", 0, IF_SVE_IL_3A_A, 0xA510A000 ) // LDNF1SH {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_A 101001010001iiii 101gggnnnnnttttt A510 A000 -INST1(ldnf1w, "ldnf1w", 0, IF_SVE_IL_3A_A, 0xA570A000 ) - // LDNF1W {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_A 101001010111iiii 101gggnnnnnttttt A570 A000 +INST1(ldnf1w, "ldnf1w", 0, IF_SVE_IL_3A_A, 0xA550A000 ) + // LDNF1W {.S }, /Z, [{, #, MUL VL}] SVE_IL_3A_A 101001010101iiii 101gggnnnnnttttt A550 A000 // enum name info SVE_IW_4A From 2db38a79f12593a7345acfd73afddf6d5c081fc2 Mon Sep 17 00:00:00 2001 From: TIHan Date: Tue, 9 Jan 2024 15:14:28 -0800 Subject: [PATCH 32/37] Feedback --- src/coreclr/jit/codegenarm64test.cpp | 5 ++++- src/coreclr/jit/emitarm64.cpp | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 5b2ffbe6539c96..c01653e0b6deee 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5491,7 +5491,7 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_D); // LD1RQD {.D }, /Z, [{, #}] theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqh, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 112, INS_OPTS_SCALABLE_H); // LD1RQH {.H }, /Z, [{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqw, EA_SCALABLE, REG_V3, REG_P2, REG_R1, -16, + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqw, EA_SCALABLE, REG_V31, REG_P2, REG_R1, -16, INS_OPTS_SCALABLE_S); // LD1RQW {.S }, /Z, [{, #}] // IF_SVE_IQ_3A @@ -5648,6 +5648,9 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_st4w, EA_SCALABLE, REG_V0, REG_P1, REG_R5, 28, INS_OPTS_SCALABLE_S); // ST4W {.S, .S, .S, .S }, , [{, // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4w, EA_SCALABLE, REG_V31, REG_P1, REG_R5, 28, + INS_OPTS_SCALABLE_S); // ST4W {.S, .S, .S, .S }, , [{, + // #, MUL VL}] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 82b559a01c9efe..fec476051514e8 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16871,7 +16871,7 @@ void emitter::emitDispSveRegList(regNumber firstReg, unsigned listSize, insOpts } else { - // short-hand + // short-hand. example: { z0.s - z2.s } emitDispSveReg(currReg, opt, false); printf(" - "); emitDispSveReg((regNumber)(currReg + listSize - 1), opt, false); From 44b3b6b3927b759761d16d341c8c5d65f001f1be Mon Sep 17 00:00:00 2001 From: TIHan Date: Tue, 9 Jan 2024 15:17:45 -0800 Subject: [PATCH 33/37] Feedback --- src/coreclr/jit/emitarm64.cpp | 9 ++++----- src/coreclr/jit/emitarm64.h | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index fec476051514e8..f6de355c046dfc 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1308,6 +1308,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg1())); // ttttt assert(isPredicateRegister(id->idReg2())); // ggg assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(elemsize)); #ifdef DEBUG switch (id->idIns()) @@ -1370,8 +1371,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; } #endif // DEBUG - - assert(isScalableVectorSize(elemsize)); break; default: @@ -13592,10 +13591,10 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * Returns the predicate type for the given SVE instruction's second register. + * Returns the predicate type for the given SVE format. */ -/*static*/ emitter::PredicateType emitter::insGetSveReg2PredicateType(insFormat fmt) +/*static*/ emitter::PredicateType emitter::insGetPredicateType(insFormat fmt) { switch (fmt) { @@ -18864,7 +18863,7 @@ void emitter::emitDispInsHelp( case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) imm = emitGetInsSC(id); emitDispSveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt - emitDispPredicateReg(id->idReg2(), insGetSveReg2PredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg printf("["); emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn if (imm != 0) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index eac2df71978050..f60cabd05393e8 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -485,8 +485,8 @@ static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); // Returns the first register list size for the given SVE instruction. static int insGetSveReg1ListSize(instruction ins); -// Returns the predicate type for the given SVE instruction's second register. -static PredicateType insGetSveReg2PredicateType(insFormat fmt); +// Returns the predicate type for the given SVE format. +static PredicateType insGetPredicateType(insFormat fmt); // Returns true if the specified instruction can encode the 'dtype' field. static bool canEncodeSveElemsize_dtype(instruction ins); From 3de8a54f2f06201f7c3b29565d2f8ac29ad3a770 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 10 Jan 2024 10:48:26 -0800 Subject: [PATCH 34/37] Feedback --- src/coreclr/jit/codegenarm64test.cpp | 3 +++ src/coreclr/jit/emitarm64.cpp | 1 + 2 files changed, 4 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index c01653e0b6deee..d79b9718400195 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5498,6 +5498,9 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V0, REG_P1, REG_R2, -16, INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V31, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 14, INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL // VL}] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f6de355c046dfc..b7c2455bb92328 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16859,6 +16859,7 @@ void emitter::emitDispSveRegList(regNumber firstReg, unsigned listSize, insOpts printf("{ "); if (listSize > 1) { + // We do not want the short-hand for list size of 2. if ((listSize == 2) || (((unsigned)currReg + listSize - 1) > (unsigned)REG_V31)) { for (unsigned i = 0; i < listSize; i++) From 6ccaca7ec14421275a6786bf08dd05e217a821bf Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 10 Jan 2024 10:54:21 -0800 Subject: [PATCH 35/37] Feedback --- src/coreclr/jit/emitarm64.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index b7c2455bb92328..310bae076fa853 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16857,10 +16857,10 @@ void emitter::emitDispSveRegList(regNumber firstReg, unsigned listSize, insOpts regNumber currReg = firstReg; printf("{ "); - if (listSize > 1) + if (listSize >= 1) { - // We do not want the short-hand for list size of 2. - if ((listSize == 2) || (((unsigned)currReg + listSize - 1) > (unsigned)REG_V31)) + // We do not want the short-hand for list size of 1 or 2. + if ((listSize <= 2) || (((unsigned)currReg + listSize - 1) > (unsigned)REG_V31)) { for (unsigned i = 0; i < listSize; i++) { @@ -16871,16 +16871,12 @@ void emitter::emitDispSveRegList(regNumber firstReg, unsigned listSize, insOpts } else { - // short-hand. example: { z0.s - z2.s } + // short-hand. example: { z0.s - z2.s } which is the same as { z0.s, z1.s, z2.s } emitDispSveReg(currReg, opt, false); printf(" - "); emitDispSveReg((regNumber)(currReg + listSize - 1), opt, false); } } - else if (listSize == 1) - { - emitDispSveReg(currReg, opt, false); - } printf(" }"); if (addComma) From ed21f1bb3a1baa2dd762705f419db0ae5880a02d Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 10 Jan 2024 11:18:03 -0800 Subject: [PATCH 36/37] Feedback --- src/coreclr/jit/emitarm64.cpp | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 310bae076fa853..cf6e3efb21872a 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16856,27 +16856,26 @@ void emitter::emitDispSveRegList(regNumber firstReg, unsigned listSize, insOpts regNumber currReg = firstReg; + assert(listSize > 0); + printf("{ "); - if (listSize >= 1) + // We do not want the short-hand for list size of 1 or 2. + if ((listSize <= 2) || (((unsigned)currReg + listSize - 1) > (unsigned)REG_V31)) { - // We do not want the short-hand for list size of 1 or 2. - if ((listSize <= 2) || (((unsigned)currReg + listSize - 1) > (unsigned)REG_V31)) + for (unsigned i = 0; i < listSize; i++) { - for (unsigned i = 0; i < listSize; i++) - { - const bool notLastRegister = (i != listSize - 1); - emitDispSveReg(currReg, opt, notLastRegister); - currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg); - } - } - else - { - // short-hand. example: { z0.s - z2.s } which is the same as { z0.s, z1.s, z2.s } - emitDispSveReg(currReg, opt, false); - printf(" - "); - emitDispSveReg((regNumber)(currReg + listSize - 1), opt, false); + const bool notLastRegister = (i != listSize - 1); + emitDispSveReg(currReg, opt, notLastRegister); + currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg); } } + else + { + // short-hand. example: { z0.s - z2.s } which is the same as { z0.s, z1.s, z2.s } + emitDispSveReg(currReg, opt, false); + printf(" - "); + emitDispSveReg((regNumber)(currReg + listSize - 1), opt, false); + } printf(" }"); if (addComma) From e8b959878f2d3da320db3e9282d8ecd73c0ada06 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 10 Jan 2024 11:44:22 -0800 Subject: [PATCH 37/37] Rename emitDispSveRegList to emitDispSveConsecutiveRegList --- src/coreclr/jit/emitarm64.cpp | 14 +++++++------- src/coreclr/jit/emitarm64.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index cf6e3efb21872a..67c3be9be944a1 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16848,9 +16848,9 @@ void emitter::emitDispVectorElemList( } //------------------------------------------------------------------------ -// emitDispSveRegList: Display a SVE vector register list +// emitDispSveConsecutiveRegList: Display a SVE consecutive vector register list // -void emitter::emitDispSveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma) +void emitter::emitDispSveConsecutiveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma) { assert(isVectorRegister(firstReg)); @@ -18706,9 +18706,9 @@ void emitter::emitDispInsHelp( // .H, { .S-.S }, # case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, true); // nnnn - emitDispImm(emitGetInsSC(id), false); // iiii + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, true); // nnnn + emitDispImm(emitGetInsSC(id), false); // iiii break; // , . @@ -18858,8 +18858,8 @@ void emitter::emitDispInsHelp( // { .D, .D, .D, .D }, , [{, #, MUL VL}] case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) imm = emitGetInsSC(id); - emitDispSveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg printf("["); emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn if (imm != 0) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index f60cabd05393e8..c35c5a5f93c918 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -53,7 +53,7 @@ void emitDispVectorReg(regNumber reg, insOpts opt, bool addComma); void emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma); void emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); void emitDispVectorElemList(regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma); -void emitDispSveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); +void emitDispSveConsecutiveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); void emitDispPredicateReg(regNumber reg, PredicateType ptype, insOpts opt, bool addComma); void emitDispLowPredicateReg(regNumber reg, PredicateType ptype, insOpts opt, bool addComma); void emitDispArrangement(insOpts opt);