diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 2dad168edd770a..d79b9718400195 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5369,6 +5369,291 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_H); // FRECPX ., /M, . theEmitter->emitIns_R_R_R(INS_sve_fsqrt, EA_SCALABLE, REG_V6, REG_P6, REG_V6, INS_OPTS_SCALABLE_S); // FSQRT ., /M, . + + // IF_SVE_IH_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V5, REG_P3, REG_R4, 0, + INS_OPTS_SCALABLE_D); // LD1D {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IH_3A_A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 5, + INS_OPTS_SCALABLE_Q); // LD1D {.Q }, /Z, [{, #, MUL VL}] + + // IF_SVE_IH_3A_F + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 3, + INS_OPTS_SCALABLE_S); // LD1W {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 3, + INS_OPTS_SCALABLE_D); // LD1W {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 3, + INS_OPTS_SCALABLE_Q); // LD1W {.Q }, /Z, [{, #, MUL VL}] + + // IF_SVE_IJ_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sw, EA_SCALABLE, REG_V0, REG_P5, REG_R3, 4, + INS_OPTS_SCALABLE_D); // LD1SW {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IJ_3A_D + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V3, REG_P0, REG_R2, 6, + INS_OPTS_SCALABLE_H); // LD1SB {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V3, REG_P0, REG_R2, 6, + INS_OPTS_SCALABLE_S); // LD1SB {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V3, REG_P0, REG_R2, 6, + INS_OPTS_SCALABLE_D); // LD1SB {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IJ_3A_E + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P1, REG_R3, 7, + INS_OPTS_SCALABLE_B); // LD1B {.B }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P1, REG_R3, 7, + INS_OPTS_SCALABLE_H); // LD1B {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P1, REG_R3, 7, + INS_OPTS_SCALABLE_S); // LD1B {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P1, REG_R3, 7, + INS_OPTS_SCALABLE_D); // LD1B {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IJ_3A_F + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V7, REG_P3, REG_R5, 2, + INS_OPTS_SCALABLE_S); // LD1SH {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V7, REG_P3, REG_R5, 2, + INS_OPTS_SCALABLE_D); // LD1SH {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IJ_3A_G + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R6, 1, + INS_OPTS_SCALABLE_H); // LD1H {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R6, 1, + INS_OPTS_SCALABLE_S); // LD1H {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R6, 1, + INS_OPTS_SCALABLE_D); // LD1H {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IL_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1d, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + INS_OPTS_SCALABLE_D); // LDNF1D {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + INS_OPTS_SCALABLE_D); // LDNF1SW {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 5, + INS_OPTS_SCALABLE_D); // LDNF1D {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sw, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 5, + INS_OPTS_SCALABLE_D); // LDNF1SW {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IL_3A_A + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sh, EA_SCALABLE, REG_V0, REG_P1, REG_R5, 5, + INS_OPTS_SCALABLE_S); // LDNF1SH {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1w, EA_SCALABLE, REG_V0, REG_P2, REG_R4, 5, + INS_OPTS_SCALABLE_S); // LDNF1W {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sh, EA_SCALABLE, REG_V0, REG_P1, REG_R5, 5, + INS_OPTS_SCALABLE_D); // LDNF1SH {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1w, EA_SCALABLE, REG_V0, REG_P2, REG_R4, 5, + INS_OPTS_SCALABLE_D); // LDNF1W {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IL_3A_B + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1h, EA_SCALABLE, REG_V1, REG_P3, REG_R2, 5, + INS_OPTS_SCALABLE_H); // LDNF1H {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sb, EA_SCALABLE, REG_V0, REG_P4, REG_R1, 5, + INS_OPTS_SCALABLE_H); // LDNF1SB {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1h, EA_SCALABLE, REG_V1, REG_P3, REG_R2, 5, + INS_OPTS_SCALABLE_S); // LDNF1H {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sb, EA_SCALABLE, REG_V0, REG_P4, REG_R1, 5, + INS_OPTS_SCALABLE_S); // LDNF1SB {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1h, EA_SCALABLE, REG_V1, REG_P3, REG_R2, 5, + INS_OPTS_SCALABLE_D); // LDNF1H {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1sb, EA_SCALABLE, REG_V0, REG_P4, REG_R1, 5, + INS_OPTS_SCALABLE_D); // LDNF1SB {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IL_3A_C + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, -4, + INS_OPTS_SCALABLE_B); // LDNF1B {.B }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, -2, + INS_OPTS_SCALABLE_H); // LDNF1B {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 2, + INS_OPTS_SCALABLE_S); // LDNF1B {.S }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnf1b, EA_SCALABLE, REG_V2, REG_P5, REG_R3, 1, + INS_OPTS_SCALABLE_D); // LDNF1B {.D }, /Z, [{, #, MUL VL}] + + // IF_SVE_IM_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ldnt1b, EA_SCALABLE, REG_V0, REG_P1, REG_R2, -5, + INS_OPTS_SCALABLE_B); // LDNT1B {.B }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnt1d, EA_SCALABLE, REG_V3, REG_P4, REG_R5, -1, + INS_OPTS_SCALABLE_D); // LDNT1D {.D }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnt1h, EA_SCALABLE, REG_V6, REG_P7, REG_R8, 0, + INS_OPTS_SCALABLE_H); // LDNT1H {.H }, /Z, [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldnt1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, -8, + INS_OPTS_SCALABLE_S); // LDNT1W {.S }, /Z, [{, #, MUL VL}] + + // IF_SVE_IO_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rob, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 0, + INS_OPTS_SCALABLE_B); // LD1ROB {.B }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rod, EA_SCALABLE, REG_V4, REG_P5, REG_R6, -32, + INS_OPTS_SCALABLE_D); // LD1ROD {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1roh, EA_SCALABLE, REG_V8, REG_P3, REG_R1, -256, + INS_OPTS_SCALABLE_H); // LD1ROH {.H }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1row, EA_SCALABLE, REG_V3, REG_P4, REG_R0, 224, + INS_OPTS_SCALABLE_S); // LD1ROW {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqb, EA_SCALABLE, REG_V6, REG_P7, REG_R8, 64, + INS_OPTS_SCALABLE_B); // LD1RQB {.B }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqd, EA_SCALABLE, REG_V9, REG_P0, REG_R1, -128, + INS_OPTS_SCALABLE_D); // LD1RQD {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqh, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 112, + INS_OPTS_SCALABLE_H); // LD1RQH {.H }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rqw, EA_SCALABLE, REG_V31, REG_P2, REG_R1, -16, + INS_OPTS_SCALABLE_S); // LD1RQW {.S }, /Z, [{, #}] + + // IF_SVE_IQ_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V0, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V31, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 14, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3q, EA_SCALABLE, REG_V0, REG_P4, REG_R5, -24, + INS_OPTS_SCALABLE_Q); // LD3Q {.Q, .Q, .Q }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3q, EA_SCALABLE, REG_V0, REG_P4, REG_R5, 21, + INS_OPTS_SCALABLE_Q); // LD3Q {.Q, .Q, .Q }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V0, REG_P5, REG_R3, -32, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V0, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V12, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V13, REG_P1, REG_R2, 14, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3q, EA_SCALABLE, REG_V14, REG_P4, REG_R5, -24, + INS_OPTS_SCALABLE_Q); // LD3Q {.Q, .Q, .Q }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3q, EA_SCALABLE, REG_V15, REG_P4, REG_R5, 21, + INS_OPTS_SCALABLE_Q); // LD3Q {.Q, .Q, .Q }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V16, REG_P5, REG_R3, -32, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V27, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V28, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V29, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V30, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4q, EA_SCALABLE, REG_V31, REG_P5, REG_R3, 28, + INS_OPTS_SCALABLE_Q); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2q, EA_SCALABLE, REG_V31, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_Q); // LD2Q {.Q, .Q }, /Z, [{, #, MUL + // VL}] + + // IF_SVE_IS_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld2b, EA_SCALABLE, REG_V0, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_B); // LD2B {.B, .B }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2d, EA_SCALABLE, REG_V4, REG_P5, REG_R7, 14, + INS_OPTS_SCALABLE_D); // LD2D {.D, .D }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2h, EA_SCALABLE, REG_V6, REG_P5, REG_R4, 8, + INS_OPTS_SCALABLE_H); // LD2H {.H, .H }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld2w, EA_SCALABLE, REG_V0, REG_P0, REG_R1, 2, + INS_OPTS_SCALABLE_S); // LD2W {.S, .S }, /Z, [{, #, MUL + // VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3b, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 21, + INS_OPTS_SCALABLE_B); // LD3B {.B, .B, .B }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3d, EA_SCALABLE, REG_V0, REG_P0, REG_R0, -24, + INS_OPTS_SCALABLE_D); // LD3D {.D, .D, .D }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3h, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 21, + INS_OPTS_SCALABLE_H); // LD3H {.H, .H, .H }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld3w, EA_SCALABLE, REG_V0, REG_P0, REG_R0, -24, + INS_OPTS_SCALABLE_S); // LD3W {.S, .S, .S }, /Z, [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4b, EA_SCALABLE, REG_V31, REG_P2, REG_R1, -32, + INS_OPTS_SCALABLE_B); // LD4B {.B, .B, .B, .B }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4d, EA_SCALABLE, REG_V8, REG_P0, REG_R0, 28, + INS_OPTS_SCALABLE_D); // LD4D {.D, .D, .D, .D }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4h, EA_SCALABLE, REG_V5, REG_P4, REG_R3, -32, + INS_OPTS_SCALABLE_H); // LD4H {.H, .H, .H, .H }, /Z, + // [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld4w, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 28, + INS_OPTS_SCALABLE_S); // LD4W {.S, .S, .S, .S }, /Z, + // [{, #, MUL VL}] + + // IF_SVE_JE_3A + theEmitter->emitIns_R_R_R_I(INS_sve_st2q, EA_SCALABLE, REG_V0, REG_P3, REG_R0, -16, + INS_OPTS_SCALABLE_Q); // ST2Q {.Q, .Q }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st3q, EA_SCALABLE, REG_V2, REG_P3, REG_R4, 21, + INS_OPTS_SCALABLE_Q); // ST3Q {.Q, .Q, .Q }, , [{, #, + // MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4q, EA_SCALABLE, REG_V7, REG_P6, REG_R5, 28, + INS_OPTS_SCALABLE_Q); // ST4Q {.Q, .Q, .Q, .Q }, , [{, + // #, MUL VL}] + + // IF_SVE_JM_3A + theEmitter->emitIns_R_R_R_I(INS_sve_stnt1b, EA_SCALABLE, REG_V1, REG_P2, REG_R3, 4, + INS_OPTS_SCALABLE_B); // STNT1B {.B }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_stnt1d, EA_SCALABLE, REG_V8, REG_P7, REG_R6, 5, + INS_OPTS_SCALABLE_D); // STNT1D {.D }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_stnt1h, EA_SCALABLE, REG_V9, REG_P1, REG_R0, -5, + INS_OPTS_SCALABLE_H); // STNT1H {.H }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_stnt1w, EA_SCALABLE, REG_V0, REG_P0, REG_R2, -7, + INS_OPTS_SCALABLE_S); // STNT1W {.S }, , [{, #, MUL VL}] + + // IF_SVE_JN_3C + theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V1, REG_P2, REG_R3, 4, + INS_OPTS_SCALABLE_D); // ST1D {.D }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V3, REG_P4, REG_R5, 6, + INS_OPTS_SCALABLE_Q); // ST1W {.Q }, , [{, #, MUL VL}] + + // IF_SVE_JN_3C_D + theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V2, REG_P1, REG_R0, 0, + INS_OPTS_SCALABLE_Q); // ST1D {.Q }, , [{, #, MUL VL}] + + // IF_SVE_JO_3A + theEmitter->emitIns_R_R_R_I(INS_sve_st2b, EA_SCALABLE, REG_V0, REG_P1, REG_R2, -16, + INS_OPTS_SCALABLE_B); // ST2B {.B, .B }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st2d, EA_SCALABLE, REG_V5, REG_P4, REG_R3, -16, + INS_OPTS_SCALABLE_D); // ST2D {.D, .D }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st2h, EA_SCALABLE, REG_V6, REG_P7, REG_R8, -16, + INS_OPTS_SCALABLE_H); // ST2H {.H, .H }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st2w, EA_SCALABLE, REG_V8, REG_P1, REG_R9, -16, + INS_OPTS_SCALABLE_S); // ST2W {.S, .S }, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st3b, EA_SCALABLE, REG_V7, REG_P6, REG_R5, -24, + INS_OPTS_SCALABLE_B); // ST3B {.B, .B, .B }, , [{, #, + // MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st3d, EA_SCALABLE, REG_V2, REG_P3, REG_R4, -24, + INS_OPTS_SCALABLE_D); // ST3D {.D, .D, .D }, , [{, #, + // MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st3h, EA_SCALABLE, REG_V1, REG_P2, REG_R3, -24, + INS_OPTS_SCALABLE_H); // ST3H {.H, .H, .H }, , [{, #, + // MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st3w, EA_SCALABLE, REG_V1, REG_P3, REG_R8, -24, + INS_OPTS_SCALABLE_S); // ST3W {.S, .S, .S }, , [{, #, + // MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4b, EA_SCALABLE, REG_V0, REG_P0, REG_R0, -32, + INS_OPTS_SCALABLE_B); // ST4B {.B, .B, .B, .B }, , [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4d, EA_SCALABLE, REG_V2, REG_P0, REG_R1, -32, + INS_OPTS_SCALABLE_D); // ST4D {.D, .D, .D, .D }, , [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4h, EA_SCALABLE, REG_V3, REG_P5, REG_R2, -32, + INS_OPTS_SCALABLE_H); // ST4H {.H, .H, .H, .H }, , [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4w, EA_SCALABLE, REG_V0, REG_P1, REG_R5, 28, + INS_OPTS_SCALABLE_S); // ST4W {.S, .S, .S, .S }, , [{, + // #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st4w, EA_SCALABLE, REG_V31, REG_P1, REG_R5, 28, + INS_OPTS_SCALABLE_S); // ST4W {.S, .S, .S, .S }, , [{, + // #, MUL VL}] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index d8c99b9a7ba3e4..67c3be9be944a1 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1271,6 +1271,108 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + elemsize = id->idOpSize(); + assert(insOptsScalable(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(elemsize)); + +#ifdef DEBUG + switch (id->idIns()) + { + case INS_sve_ld2b: + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: + case INS_sve_ld2q: + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: + case INS_sve_st2q: + assert(isValidSimm4_MultipleOf2(emitGetInsSC(id))); // iiii + break; + + case INS_sve_ld3b: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: + case INS_sve_ld3q: + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: + case INS_sve_st3q: + assert(isValidSimm4_MultipleOf3(emitGetInsSC(id))); // iiii + break; + + case INS_sve_ld4b: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: + case INS_sve_ld4q: + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: + case INS_sve_st4q: + assert(isValidSimm4_MultipleOf4(emitGetInsSC(id))); // iiii + break; + + case INS_sve_ld1rqb: + case INS_sve_ld1rqd: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + assert(isValidSimm4_MultipleOf16(emitGetInsSC(id))); // iiii + break; + + case INS_sve_ld1rob: + case INS_sve_ld1rod: + case INS_sve_ld1roh: + case INS_sve_ld1row: + assert(isValidSimm4_MultipleOf32(emitGetInsSC(id))); // iiii + break; + + default: + assert(isValidSimm4(emitGetInsSC(id))); // iiii + break; + } +#endif // DEBUG + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -5382,6 +5484,9 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) case INS_OPTS_SCALABLE_D_WITH_PREDICATE_MERGE: return EA_8BYTE; + case INS_OPTS_SCALABLE_Q: + return EA_16BYTE; + default: assert(!"Invalid insOpt for vector register"); return EA_UNKNOWN; @@ -9531,171 +9636,676 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_DV_3AI; break; - default: - unreached(); + case INS_sve_ld1d: + assert(insOptsScalable(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + if (opt == INS_OPTS_SCALABLE_Q) + { + fmt = IF_SVE_IH_3A_A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + fmt = IF_SVE_IH_3A; + } break; - } // end switch (ins) + case INS_sve_ld1w: + assert(insOptsScalableWordsOrQuadwords(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IH_3A_F; + break; - if (isLdSt) - { - assert(!isAddSub); - assert(isGeneralRegisterOrSP(reg3)); - assert(insOptsNone(opt) || insOptsIndexed(opt)); + case INS_sve_ld1sw: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A; + break; - if (isSIMD) - { - assert(isValidVectorLSPDatasize(size)); + case INS_sve_ld1sb: + assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert((scale >= 2) && (scale <= 4)); - } - else - { - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegisterOrZR(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert((scale == 2) || (scale == 3)); - } + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_D; + break; - // Load/Store Pair reserved encodings: - if (emitInsIsLoad(ins)) - { - assert(reg1 != reg2); - } - if (insOptsIndexed(opt)) - { - assert(reg1 != reg3); - assert(reg2 != reg3); - } + case INS_sve_ld1b: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_E; + break; - reg3 = encodingSPtoZR(reg3); + case INS_sve_ld1sh: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_F; + break; - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (imm == 0) - { - assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero + case INS_sve_ld1h: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_G; + break; - fmt = IF_LS_3B; - } - else - { - if ((imm & mask) == 0) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st + case INS_sve_ldnf1sw: + case INS_sve_ldnf1d: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IL_3A; + break; + + case INS_sve_ldnf1sh: + case INS_sve_ldnf1w: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IL_3A_A; + break; + + case INS_sve_ldnf1h: + case INS_sve_ldnf1sb: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IL_3A_B; + break; + + case INS_sve_ldnf1b: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IL_3A_C; + break; + + case INS_sve_ldnt1b: + case INS_sve_ldnt1h: + case INS_sve_ldnt1w: + case INS_sve_ldnt1d: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); - if ((imm >= -64) && (imm <= 63)) - { - fmt = IF_LS_3C; - } - } #ifdef DEBUG - if (fmt != IF_LS_3C) + switch (ins) { - assert(!"Instruction cannot be encoded: IF_LS_3C"); - } -#endif - } - } - else if (isAddSub) - { - bool reg2IsSP = (reg2 == REG_SP); - assert(!isLdSt); - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg3)); + case INS_sve_ldnt1b: + assert(opt == INS_OPTS_SCALABLE_B); + break; - if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option - { - assert(isGeneralRegisterOrZR(reg1)); - } - else - { - assert(isGeneralRegisterOrSP(reg1)); - reg1 = encodingSPtoZR(reg1); - } + case INS_sve_ldnt1h: + assert(opt == INS_OPTS_SCALABLE_H); + break; - if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option - { - assert(isGeneralRegister(reg2)); - } - else - { - assert(isGeneralRegisterOrSP(reg2)); - reg2 = encodingSPtoZR(reg2); - } + case INS_sve_ldnt1w: + assert(opt == INS_OPTS_SCALABLE_S); + break; - if (insOptsAnyExtend(opt)) - { - assert((imm >= 0) && (imm <= 4)); + case INS_sve_ldnt1d: + assert(opt == INS_OPTS_SCALABLE_D); + break; - fmt = IF_DR_3C; - } - else if (insOptsAluShift(opt)) - { - // imm should be non-zero and in [1..63] - assert(isValidImmShift(imm, size) && (imm != 0)); - fmt = IF_DR_3B; - } - else if (imm == 0) - { - assert(insOptsNone(opt)); + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG - if (reg2IsSP) + fmt = IF_SVE_IM_3A; + break; + + case INS_sve_ld1rqb: + case INS_sve_ld1rob: + case INS_sve_ld1rqh: + case INS_sve_ld1roh: + case INS_sve_ld1rqw: + case INS_sve_ld1row: + case INS_sve_ld1rqd: + case INS_sve_ld1rod: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + +#ifdef DEBUG + switch (ins) { - // To encode the SP register as reg2 we must use the IF_DR_3C encoding - // and also specify a LSL of zero (imm == 0) - opt = INS_OPTS_LSL; - fmt = IF_DR_3C; + case INS_sve_ld1rqb: + case INS_sve_ld1rqd: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + assert(isValidSimm4_MultipleOf16(imm)); + break; + + case INS_sve_ld1rob: + case INS_sve_ld1rod: + case INS_sve_ld1roh: + case INS_sve_ld1row: + assert(isValidSimm4_MultipleOf32(imm)); + break; + + default: + assert(!"Invalid instruction"); + break; } - else + + switch (ins) { - fmt = IF_DR_3A; + case INS_sve_ld1rqb: + case INS_sve_ld1rob: + assert(opt == INS_OPTS_SCALABLE_B); + break; + + case INS_sve_ld1rqh: + case INS_sve_ld1roh: + assert(opt == INS_OPTS_SCALABLE_H); + break; + + case INS_sve_ld1rqw: + case INS_sve_ld1row: + assert(opt == INS_OPTS_SCALABLE_S); + break; + + case INS_sve_ld1rqd: + case INS_sve_ld1rod: + assert(opt == INS_OPTS_SCALABLE_D); + break; + + default: + assert(!"Invalid instruction"); + break; } - } - else - { - assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A"); - } - } +#endif // DEBUG - assert(fmt != IF_NONE); + fmt = IF_SVE_IO_3A; + break; - instrDesc* id = emitNewInstrCns(attr, imm); + case INS_sve_ld2q: + case INS_sve_ld3q: + case INS_sve_ld4q: + assert(opt == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); +#ifdef DEBUG + switch (ins) + { + case INS_sve_ld2q: + assert(isValidSimm4_MultipleOf2(imm)); + break; - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); + case INS_sve_ld3q: + assert(isValidSimm4_MultipleOf3(imm)); + break; - // Record the attribute for the second register in the pair - id->idGCrefReg2(GCT_NONE); - if (attrReg2 != EA_UNKNOWN) - { - // Record the attribute for the second register in the pair - assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C)); - if (EA_IS_GCREF(attrReg2)) - { - id->idGCrefReg2(GCT_GCREF); - } - else if (EA_IS_BYREF(attrReg2)) - { - id->idGCrefReg2(GCT_BYREF); - } - } + case INS_sve_ld4q: + assert(isValidSimm4_MultipleOf4(imm)); + break; - dispIns(id); - appendToCurIG(id); -} + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG -/***************************************************************************** - * - * Add an instruction referencing three registers, with an extend option - */ + fmt = IF_SVE_IQ_3A; + break; + + case INS_sve_ld2b: + case INS_sve_ld3b: + case INS_sve_ld4b: + case INS_sve_ld2h: + case INS_sve_ld3h: + case INS_sve_ld4h: + case INS_sve_ld2w: + case INS_sve_ld3w: + case INS_sve_ld4w: + case INS_sve_ld2d: + case INS_sve_ld3d: + case INS_sve_ld4d: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); -void emitter::emitIns_R_R_R_Ext(instruction ins, +#ifdef DEBUG + switch (ins) + { + case INS_sve_ld2b: + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: + assert(isValidSimm4_MultipleOf2(imm)); + break; + + case INS_sve_ld3b: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: + assert(isValidSimm4_MultipleOf3(imm)); + break; + + case INS_sve_ld4b: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: + assert(isValidSimm4_MultipleOf4(imm)); + break; + + default: + assert(!"Invalid instruction"); + break; + } + + switch (ins) + { + case INS_sve_ld2b: + case INS_sve_ld3b: + case INS_sve_ld4b: + assert(opt == INS_OPTS_SCALABLE_B); + break; + + case INS_sve_ld2h: + case INS_sve_ld3h: + case INS_sve_ld4h: + assert(opt == INS_OPTS_SCALABLE_H); + break; + + case INS_sve_ld2w: + case INS_sve_ld3w: + case INS_sve_ld4w: + assert(opt == INS_OPTS_SCALABLE_S); + break; + + case INS_sve_ld2d: + case INS_sve_ld3d: + case INS_sve_ld4d: + assert(opt == INS_OPTS_SCALABLE_D); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_IS_3A; + break; + + case INS_sve_st2q: + case INS_sve_st3q: + case INS_sve_st4q: + assert(opt == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_st2q: + assert(isValidSimm4_MultipleOf2(imm)); + break; + + case INS_sve_st3q: + assert(isValidSimm4_MultipleOf3(imm)); + break; + + case INS_sve_st4q: + assert(isValidSimm4_MultipleOf4(imm)); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_JE_3A; + break; + + case INS_sve_stnt1b: + case INS_sve_stnt1h: + case INS_sve_stnt1w: + case INS_sve_stnt1d: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_stnt1b: + assert(opt == INS_OPTS_SCALABLE_B); + break; + + case INS_sve_stnt1h: + assert(opt == INS_OPTS_SCALABLE_H); + break; + + case INS_sve_stnt1w: + assert(opt == INS_OPTS_SCALABLE_S); + break; + + case INS_sve_stnt1d: + assert(opt == INS_OPTS_SCALABLE_D); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_JM_3A; + break; + + case INS_sve_st1w: + case INS_sve_st1d: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + + if (opt == INS_OPTS_SCALABLE_Q && (ins == INS_sve_st1d)) + { + fmt = IF_SVE_JN_3C_D; + } + else + { +#if DEBUG + if (ins == INS_sve_st1w) + { + assert(opt == INS_OPTS_SCALABLE_Q); + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + } +#endif // DEBUG + fmt = IF_SVE_JN_3C; + } + break; + + case INS_sve_st2b: + case INS_sve_st3b: + case INS_sve_st4b: + case INS_sve_st2h: + case INS_sve_st3h: + case INS_sve_st4h: + case INS_sve_st2w: + case INS_sve_st3w: + case INS_sve_st4w: + case INS_sve_st2d: + case INS_sve_st3d: + case INS_sve_st4d: + assert(insOptsScalableSimple(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: + assert(isValidSimm4_MultipleOf2(imm)); + break; + + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: + assert(isValidSimm4_MultipleOf3(imm)); + break; + + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: + assert(isValidSimm4_MultipleOf4(imm)); + break; + + default: + assert(!"Invalid instruction"); + break; + } + + switch (ins) + { + case INS_sve_st2b: + case INS_sve_st3b: + case INS_sve_st4b: + assert(opt == INS_OPTS_SCALABLE_B); + break; + + case INS_sve_st2h: + case INS_sve_st3h: + case INS_sve_st4h: + assert(opt == INS_OPTS_SCALABLE_H); + break; + + case INS_sve_st2w: + case INS_sve_st3w: + case INS_sve_st4w: + assert(opt == INS_OPTS_SCALABLE_S); + break; + + case INS_sve_st2d: + case INS_sve_st3d: + case INS_sve_st4d: + assert(opt == INS_OPTS_SCALABLE_D); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_JO_3A; + break; + + default: + unreached(); + break; + + } // end switch (ins) + + if (isLdSt) + { + assert(!isAddSub); + assert(isGeneralRegisterOrSP(reg3)); + assert(insOptsNone(opt) || insOptsIndexed(opt)); + + if (isSIMD) + { + assert(isValidVectorLSPDatasize(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert((scale >= 2) && (scale <= 4)); + } + else + { + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert((scale == 2) || (scale == 3)); + } + + // Load/Store Pair reserved encodings: + if (emitInsIsLoad(ins)) + { + assert(reg1 != reg2); + } + if (insOptsIndexed(opt)) + { + assert(reg1 != reg3); + assert(reg2 != reg3); + } + + reg3 = encodingSPtoZR(reg3); + + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0) + { + assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero + + fmt = IF_LS_3B; + } + else + { + if ((imm & mask) == 0) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + + if ((imm >= -64) && (imm <= 63)) + { + fmt = IF_LS_3C; + } + } +#ifdef DEBUG + if (fmt != IF_LS_3C) + { + assert(!"Instruction cannot be encoded: IF_LS_3C"); + } +#endif + } + } + else if (isAddSub) + { + bool reg2IsSP = (reg2 == REG_SP); + assert(!isLdSt); + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg3)); + + if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option + { + assert(isGeneralRegisterOrZR(reg1)); + } + else + { + assert(isGeneralRegisterOrSP(reg1)); + reg1 = encodingSPtoZR(reg1); + } + + if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option + { + assert(isGeneralRegister(reg2)); + } + else + { + assert(isGeneralRegisterOrSP(reg2)); + reg2 = encodingSPtoZR(reg2); + } + + if (insOptsAnyExtend(opt)) + { + assert((imm >= 0) && (imm <= 4)); + + fmt = IF_DR_3C; + } + else if (insOptsAluShift(opt)) + { + // imm should be non-zero and in [1..63] + assert(isValidImmShift(imm, size) && (imm != 0)); + fmt = IF_DR_3B; + } + else if (imm == 0) + { + assert(insOptsNone(opt)); + + if (reg2IsSP) + { + // To encode the SP register as reg2 we must use the IF_DR_3C encoding + // and also specify a LSL of zero (imm == 0) + opt = INS_OPTS_LSL; + fmt = IF_DR_3C; + } + else + { + fmt = IF_DR_3A; + } + } + else + { + assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A"); + } + } + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrCns(attr, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + + // Record the attribute for the second register in the pair + id->idGCrefReg2(GCT_NONE); + if (attrReg2 != EA_UNKNOWN) + { + // Record the attribute for the second register in the pair + assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C)); + if (EA_IS_GCREF(attrReg2)) + { + id->idGCrefReg2(GCT_GCREF); + } + else if (EA_IS_BYREF(attrReg2)) + { + id->idGCrefReg2(GCT_BYREF); + } + } + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing three registers, with an extend option + */ + +void emitter::emitIns_R_R_R_Ext(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, @@ -12779,92 +13389,637 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize(emitAttr size) +{ + switch (size) + { + case EA_1BYTE: + return 0x00000000; + + case EA_2BYTE: + return 0x00400000; // set the bit at location 22 + + case EA_4BYTE: + return 0x00800000; // set the bit at location 23 + + case EA_8BYTE: + return 0x00C00000; // set the bit at location 23 and 22 + + default: + assert(!"Invalid insOpt for vector register"); + } + return 0; +} + +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + * This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size) +{ + switch (size) + { + case EA_1BYTE: + return 0x080000; // set the bit at location 19 + + case EA_2BYTE: + return 0x100000; // set the bit at location 20 + + case EA_4BYTE: + return 0x400000; // set the bit at location 22 + + default: + assert(!"Invalid size for vector register"); + } + return 0; +} + +/***************************************************************************** + * + * Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate. + * This specifically encodes the field 'tszh:tszl' at bit locations '23-22:9-8'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift, size_t imm) +{ + code_t encodedSize = 0; + + switch (size) + { + case EA_1BYTE: + encodedSize = 0x100; // set the bit at location 8 + break; + + case EA_2BYTE: + encodedSize = 0x200; // set the bit at location 9 + break; + + case EA_4BYTE: + encodedSize = 0x400000; // set the bit at location 22 + break; + + case EA_8BYTE: + encodedSize = 0x800000; // set the bit at location 23 + break; + + default: + assert(!"Invalid esize for vector register"); + } + + code_t encodedImm = insEncodeShiftImmediate(size, isRightShift, imm); + code_t imm3High = (encodedImm & 0x60) << 17; + code_t imm3Low = (encodedImm & 0x1f) << 5; + return encodedSize | imm3High | imm3Low; +} + +/***************************************************************************** + * + * Returns the register list size for the given SVE instruction. + */ + +/*static*/ int emitter::insGetSveReg1ListSize(instruction ins) +{ + switch (ins) + { + case INS_sve_ld1d: + case INS_sve_ld1w: + case INS_sve_ld1sw: + case INS_sve_ld1sb: + case INS_sve_ld1b: + case INS_sve_ld1sh: + case INS_sve_ld1h: + case INS_sve_ldnf1d: + case INS_sve_ldnf1sw: + case INS_sve_ldnf1sh: + case INS_sve_ldnf1w: + case INS_sve_ldnf1h: + case INS_sve_ldnf1sb: + case INS_sve_ldnf1b: + case INS_sve_ldnt1b: + case INS_sve_ldnt1d: + case INS_sve_ldnt1h: + case INS_sve_ldnt1w: + case INS_sve_ld1rob: + case INS_sve_ld1rod: + case INS_sve_ld1roh: + case INS_sve_ld1row: + case INS_sve_ld1rqb: + case INS_sve_ld1rqd: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + case INS_sve_stnt1b: + case INS_sve_stnt1d: + case INS_sve_stnt1h: + case INS_sve_stnt1w: + case INS_sve_st1d: + case INS_sve_st1w: + case INS_sve_ldff1sh: + case INS_sve_ldff1w: + case INS_sve_ldff1h: + case INS_sve_ldff1d: + case INS_sve_ldff1sw: + case INS_sve_st1b: + case INS_sve_ldff1sb: + case INS_sve_ldff1b: + case INS_sve_ldnt1sb: + case INS_sve_ldnt1sh: + case INS_sve_ld1rd: + case INS_sve_ld1rsw: + case INS_sve_ld1rh: + case INS_sve_ld1rsb: + case INS_sve_ld1rsh: + case INS_sve_ld1rw: + case INS_sve_ld1q: + case INS_sve_ldnt1sw: + case INS_sve_st1q: + return 1; + + case INS_sve_ld2b: + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: + case INS_sve_ld2q: + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: + case INS_sve_st2q: + case INS_sve_whilege: // SVE_DX_3A + case INS_sve_whilegt: // SVE_DX_3A + case INS_sve_whilehi: // SVE_DX_3A + case INS_sve_whilehs: // SVE_DX_3A + case INS_sve_whilele: // SVE_DX_3A + case INS_sve_whilels: // SVE_DX_3A + case INS_sve_whilelt: // SVE_DX_3A + case INS_sve_pext: // SVE_DW_2B + return 2; + + case INS_sve_ld3b: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: + case INS_sve_ld3q: + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: + case INS_sve_st3q: + return 3; + + case INS_sve_ld4b: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: + case INS_sve_ld4q: + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: + case INS_sve_st4q: + return 4; + + default: + assert(!"Unexpected instruction"); + return 1; + } +} + +/***************************************************************************** + * + * Returns the predicate type for the given SVE format. + */ + +/*static*/ emitter::PredicateType emitter::insGetPredicateType(insFormat fmt) +{ + switch (fmt) + { + case IF_SVE_BV_2A: + case IF_SVE_HW_4A: + case IF_SVE_HW_4A_A: + case IF_SVE_HW_4A_B: + case IF_SVE_HW_4A_C: + case IF_SVE_HW_4B: + case IF_SVE_HW_4B_D: + case IF_SVE_HX_3A_E: + case IF_SVE_IJ_3A_D: + case IF_SVE_IJ_3A_E: + case IF_SVE_IJ_3A_F: + case IF_SVE_IK_4A_G: + case IF_SVE_IJ_3A_G: + case IF_SVE_IK_4A_I: + case IF_SVE_IH_3A_F: + case IF_SVE_II_4A_H: + case IF_SVE_IH_3A: + case IF_SVE_IH_3A_A: + case IF_SVE_II_4A: + case IF_SVE_II_4A_B: + case IF_SVE_IU_4A: + case IF_SVE_IU_4A_C: + case IF_SVE_IU_4B: + case IF_SVE_IU_4B_D: + case IF_SVE_IV_3A: + case IF_SVE_IG_4A_F: + case IF_SVE_IG_4A_G: + case IF_SVE_IJ_3A: + case IF_SVE_IK_4A: + case IF_SVE_IU_4A_A: + case IF_SVE_IU_4B_B: + case IF_SVE_HX_3A_B: + case IF_SVE_IG_4A_D: + case IF_SVE_IG_4A_E: + case IF_SVE_IF_4A: + case IF_SVE_IF_4A_A: + case IF_SVE_IM_3A: + case IF_SVE_IN_4A: + case IF_SVE_CX_4A: + case IF_SVE_CX_4A_A: + case IF_SVE_CY_3A: + case IF_SVE_CY_3B: + case IF_SVE_IX_4A: + case IF_SVE_HI_3A: + case IF_SVE_HT_4A: + case IF_SVE_DG_2A: + case IF_SVE_IO_3A: + case IF_SVE_IP_4A: + case IF_SVE_IQ_3A: + case IF_SVE_IR_4A: + case IF_SVE_IS_3A: + case IF_SVE_IT_4A: + case IF_SVE_DA_4A: + case IF_SVE_DB_3B: + case IF_SVE_DC_3A: + case IF_SVE_GE_4A: + case IF_SVE_GI_4A: + case IF_SVE_IC_3A_C: + case IF_SVE_IC_3A: + case IF_SVE_IC_3A_B: + case IF_SVE_IC_3A_A: + case IF_SVE_IL_3A_C: + case IF_SVE_IL_3A: + case IF_SVE_IL_3A_B: + case IF_SVE_IL_3A_A: + case IF_SVE_IW_4A: + return PREDICATE_ZERO; + + case IF_SVE_BV_2A_J: + case IF_SVE_CP_3A: + case IF_SVE_CQ_3A: + case IF_SVE_CZ_4A_K: + case IF_SVE_AM_2A: + case IF_SVE_AN_3A: + case IF_SVE_AO_3A: + case IF_SVE_HL_3A: + case IF_SVE_HM_2A: + case IF_SVE_AA_3A: + case IF_SVE_BU_2A: + case IF_SVE_BV_2B: + case IF_SVE_HS_3A: + case IF_SVE_HS_3A_H: + case IF_SVE_HS_3A_I: + case IF_SVE_HS_3A_J: + case IF_SVE_HP_3B: + case IF_SVE_HP_3B_H: + case IF_SVE_HP_3B_I: + case IF_SVE_HP_3B_J: + case IF_SVE_AR_4A: + case IF_SVE_BV_2A_A: + case IF_SVE_AB_3A: + case IF_SVE_ET_3A: + case IF_SVE_HU_4A: + case IF_SVE_HL_3B: + case IF_SVE_AD_3A: + case IF_SVE_AB_3B: + case IF_SVE_AE_3A: + case IF_SVE_EU_3A: + case IF_SVE_GT_4A: + case IF_SVE_AP_3A: + case IF_SVE_HO_3A: + case IF_SVE_HO_3A_B: + case IF_SVE_GQ_3A: + case IF_SVE_HU_4B: + case IF_SVE_AQ_3A: + case IF_SVE_CU_3A: + case IF_SVE_AC_3A: + case IF_SVE_ER_3A: + case IF_SVE_GR_3A: + case IF_SVE_ES_3A: + case IF_SVE_HR_3A: + case IF_SVE_EP_3A: + case IF_SVE_GP_3A: + case IF_SVE_EQ_3A: + case IF_SVE_HQ_3A: + case IF_SVE_AS_4A: + case IF_SVE_CT_3A: + case IF_SVE_HP_3A: + case IF_SVE_HV_4A: + return PREDICATE_MERGE; + + case IF_SVE_CZ_4A_A: + case IF_SVE_CZ_4A_L: + case IF_SVE_CF_2A: + case IF_SVE_CF_2B: + case IF_SVE_CF_2C: + case IF_SVE_CF_2D: + case IF_SVE_CI_3A: + case IF_SVE_DL_2A: + case IF_SVE_DM_2A: + case IF_SVE_DN_2A: + case IF_SVE_DO_2A: + case IF_SVE_DP_2A: + case IF_SVE_CK_2A: + case IF_SVE_DI_2A: + return PREDICATE_SIZED; + + // This is a special case as the second register could be ZERO or MERGE. + // / + // Therefore, by default return NONE due to ambiguity. + case IF_SVE_AH_3A: + case IF_SVE_DB_3A: + // TODO: Handle these cases. + break; + + case IF_SVE_JD_4B: + case IF_SVE_JD_4C: + case IF_SVE_JI_3A_A: + case IF_SVE_JJ_4A: + case IF_SVE_JJ_4A_B: + case IF_SVE_JJ_4A_C: + case IF_SVE_JJ_4A_D: + case IF_SVE_JJ_4B: + case IF_SVE_JJ_4B_E: + case IF_SVE_JN_3B: + case IF_SVE_JN_3C: + case IF_SVE_JD_4A: + case IF_SVE_JN_3A: + case IF_SVE_JD_4C_A: + case IF_SVE_JJ_4B_C: + case IF_SVE_JL_3A: + case IF_SVE_JN_3C_D: + case IF_SVE_HY_3A: + case IF_SVE_HY_3A_A: + case IF_SVE_HY_3B: + case IF_SVE_HZ_2A_B: + case IF_SVE_IA_2A: + case IF_SVE_IB_3A: + case IF_SVE_JK_4A: + case IF_SVE_JK_4A_B: + case IF_SVE_JK_4B: + case IF_SVE_IZ_4A: + case IF_SVE_IZ_4A_A: + case IF_SVE_JB_4A: + case IF_SVE_JM_3A: + case IF_SVE_CM_3A: + case IF_SVE_CN_3A: + case IF_SVE_CO_3A: + case IF_SVE_JA_4A: + case IF_SVE_CR_3A: + case IF_SVE_CS_3A: + case IF_SVE_CV_3A: + case IF_SVE_CV_3B: + case IF_SVE_DK_3A: + case IF_SVE_DW_2A: // [] + case IF_SVE_DW_2B: // [] + case IF_SVE_JC_4A: + case IF_SVE_JO_3A: + case IF_SVE_JE_3A: + case IF_SVE_JF_4A: + case IF_SVE_AK_3A: + case IF_SVE_HE_3A: + case IF_SVE_AF_3A: + case IF_SVE_AG_3A: + case IF_SVE_AI_3A: + case IF_SVE_AJ_3A: + case IF_SVE_AL_3A: + case IF_SVE_CL_3A: + case IF_SVE_DD_2A: + case IF_SVE_DF_2A: + case IF_SVE_GS_3A: + case IF_SVE_HJ_3A: + case IF_SVE_IY_4A: + return PREDICATE_NONE; + + default: + break; + } + + assert(!"Unexpected instruction format"); + return PREDICATE_NONE; +} + +/***************************************************************************** + * + * Returns true if the specified instruction can encode the 'dtype' field. + */ + +/*static*/ bool emitter::canEncodeSveElemsize_dtype(instruction ins) +{ + switch (ins) + { + case INS_sve_ld1w: + case INS_sve_ld1sb: + case INS_sve_ld1b: + case INS_sve_ld1sh: + case INS_sve_ld1h: + case INS_sve_ldnf1sh: + case INS_sve_ldnf1w: + case INS_sve_ldnf1h: + case INS_sve_ldnf1sb: + case INS_sve_ldnf1b: + return true; + + default: + return false; + } +} + +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8/16 byte elemsize for an Arm64 Sve vector instruction + * for the 'dtype' field. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t code) +{ + assert(canEncodeSveElemsize_dtype(ins)); + switch (size) + { + case EA_1BYTE: + switch (ins) + { + case INS_sve_ld1b: + case INS_sve_ldnf1b: + return code; // By default, the instruction already encodes 8-bit. + + default: + assert(!"Invalid instruction for encoding dtype."); + } + return code; + + case EA_2BYTE: + switch (ins) + { + case INS_sve_ld1b: + case INS_sve_ld1h: + case INS_sve_ldnf1b: + case INS_sve_ldnf1h: + return code | (1 << 21); // Set bit '21' to 1. + + case INS_sve_ld1sb: + case INS_sve_ldnf1sb: + return code | (1 << 22); // Set bit '22' to 1. + + default: + assert(!"Invalid instruction for encoding dtype."); + } + return code; + + case EA_4BYTE: + switch (ins) + { + case INS_sve_ld1w: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for S. + return (code | (1 << 15)) | (1 << 22); // Set bit '22' and '15' to 1. + + case INS_sve_ldnf1w: + return code; // By default, the instruction already encodes 32-bit. + + case INS_sve_ld1b: + case INS_sve_ld1h: + case INS_sve_ldnf1b: + case INS_sve_ldnf1h: + return code | (1 << 22); // Set bit '22' to 1. + + case INS_sve_ld1sb: + case INS_sve_ld1sh: + case INS_sve_ldnf1sb: + case INS_sve_ldnf1sh: + return code | (1 << 21); // Set bit '21' to 1. + + default: + assert(!"Invalid instruction for encoding dtype."); + } + return code; + + case EA_8BYTE: + switch (ins) + { + case INS_sve_ld1w: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for D. + return ((code | (1 << 15)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '15' to 1. + + case INS_sve_ldnf1w: + return code | (1 << 21); // Set bit '21' to 1. Set bit '15' to 1. + + case INS_sve_ld1b: + case INS_sve_ld1h: + case INS_sve_ldnf1b: + case INS_sve_ldnf1h: + return (code | (1 << 22)) | (1 << 21); // Set bit '22' and '21' to 1. + + case INS_sve_ld1sb: + case INS_sve_ld1sh: + case INS_sve_ldnf1sb: + case INS_sve_ldnf1sh: + return code; // By default, the instruction already encodes 64-bit. + + default: + assert(!"Invalid instruction for encoding dtype."); + } + return code; + + case EA_16BYTE: + switch (ins) + { + case INS_sve_ld1w: + return code | (1 << 20); // Set bit '20' to 1. + + default: + assert(!"Invalid instruction for encoding dtype."); + } + return code; + + default: + assert(!"Invalid size for encoding dtype."); + } + + return code; +} + +/***************************************************************************** + * + * Returns the encoding for the immediate value as 4-bits at bit locations '19-16'. */ -/*static*/ emitter::code_t emitter::insEncodeSveElemsize(emitAttr size) +/*static*/ emitter::code_t emitter::insEncodeSimm4_19_to_16(ssize_t imm) { - switch (size) + assert(isValidSimm4(imm)); + if (imm < 0) { - case EA_1BYTE: - return 0x00000000; - - case EA_2BYTE: - return 0x00400000; // set the bit at location 22 - - case EA_4BYTE: - return 0x00800000; // set the bit at location 23 - - case EA_8BYTE: - return 0x00C00000; // set the bit at location 23 and 22 - - default: - assert(!"Invalid insOpt for vector register"); + imm = (imm & 0xF); } - return 0; + return (code_t)imm << 16; } /***************************************************************************** * - * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction - * This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. + * Returns the encoding for the immediate value that is a multiple of 2 as 4-bits at bit locations '19-16'. */ -/*static*/ emitter::code_t emitter::insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size) +/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf2_19_to_16(ssize_t imm) { - switch (size) - { - case EA_1BYTE: - return 0x080000; // set the bit at location 19 - - case EA_2BYTE: - return 0x100000; // set the bit at location 20 - - case EA_4BYTE: - return 0x400000; // set the bit at location 22 - - default: - assert(!"Invalid size for vector register"); - } - return 0; + assert(isValidSimm4_MultipleOf2(imm)); + return insEncodeSimm4_19_to_16(imm / 2); } /***************************************************************************** * - * Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate. - * This specifically encodes the field 'tszh:tszl' at bit locations '23-22:9-8'. + * Returns the encoding for the immediate value that is a multiple of 3 as 4-bits at bit locations '19-16'. */ -/*static*/ emitter::code_t emitter::insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift, size_t imm) +/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf3_19_to_16(ssize_t imm) { - code_t encodedSize = 0; + assert(isValidSimm4_MultipleOf3(imm)); + return insEncodeSimm4_19_to_16(imm / 3); +} - switch (size) - { - case EA_1BYTE: - encodedSize = 0x100; // set the bit at location 8 - break; +/***************************************************************************** + * + * Returns the encoding for the immediate value that is a multiple of 4 as 4-bits at bit locations '19-16'. + */ - case EA_2BYTE: - encodedSize = 0x200; // set the bit at location 9 - break; +/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf4_19_to_16(ssize_t imm) +{ + assert(isValidSimm4_MultipleOf4(imm)); + return insEncodeSimm4_19_to_16(imm / 4); +} - case EA_4BYTE: - encodedSize = 0x400000; // set the bit at location 22 - break; +/***************************************************************************** + * + * Returns the encoding for the immediate value that is a multiple of 16 as 4-bits at bit locations '19-16'. + */ - case EA_8BYTE: - encodedSize = 0x800000; // set the bit at location 23 - break; +/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm) +{ + assert(isValidSimm4_MultipleOf16(imm)); + return insEncodeSimm4_19_to_16(imm / 16); +} - default: - assert(!"Invalid esize for vector register"); - } +/***************************************************************************** + * + * Returns the encoding for the immediate value that is a multiple of 32 as 4-bits at bit locations '19-16'. + */ - code_t encodedImm = insEncodeShiftImmediate(size, isRightShift, imm); - code_t imm3High = (encodedImm & 0x60) << 17; - code_t imm3Low = (encodedImm & 0x1f) << 5; - return encodedSize | imm3High | imm3Low; +/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm) +{ + assert(isValidSimm4_MultipleOf32(imm)); + return insEncodeSimm4_19_to_16(imm / 32); } /***************************************************************************** @@ -15034,6 +16189,112 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + + switch (ins) + { + case INS_sve_ld2b: + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: + case INS_sve_ld2q: + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: + case INS_sve_st2q: + code |= insEncodeSimm4_MultipleOf2_19_to_16(imm); // iiii + break; + + case INS_sve_ld3b: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: + case INS_sve_ld3q: + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: + case INS_sve_st3q: + code |= insEncodeSimm4_MultipleOf3_19_to_16(imm); // iiii + break; + + case INS_sve_ld4b: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: + case INS_sve_ld4q: + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: + case INS_sve_st4q: + code |= insEncodeSimm4_MultipleOf4_19_to_16(imm); // iiii + break; + + case INS_sve_ld1rqb: + case INS_sve_ld1rqd: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + code |= insEncodeSimm4_MultipleOf16_19_to_16(imm); // iiii + break; + + case INS_sve_ld1rob: + case INS_sve_ld1rod: + case INS_sve_ld1roh: + case INS_sve_ld1row: + code |= insEncodeSimm4_MultipleOf32_19_to_16(imm); // iiii + break; + + default: + code |= insEncodeSimm4_19_to_16(imm); // iiii + break; + } + + if (canEncodeSveElemsize_dtype(ins)) + { + code = insEncodeSveElemsize_dtype(ins, optGetSveElemsize(id->idInsOpt()), code); + } + + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -15587,20 +16848,33 @@ void emitter::emitDispVectorElemList( } //------------------------------------------------------------------------ -// emitDispSveRegList: Display a SVE vector register list +// emitDispSveConsecutiveRegList: Display a SVE consecutive vector register list // -void emitter::emitDispSveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma) +void emitter::emitDispSveConsecutiveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma) { assert(isVectorRegister(firstReg)); regNumber currReg = firstReg; + assert(listSize > 0); + printf("{ "); - for (unsigned i = 0; i < listSize; i++) + // We do not want the short-hand for list size of 1 or 2. + if ((listSize <= 2) || (((unsigned)currReg + listSize - 1) > (unsigned)REG_V31)) { - const bool notLastRegister = (i != listSize - 1); - emitDispSveReg(currReg, opt, notLastRegister); - currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg); + for (unsigned i = 0; i < listSize; i++) + { + const bool notLastRegister = (i != listSize - 1); + emitDispSveReg(currReg, opt, notLastRegister); + currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg); + } + } + else + { + // short-hand. example: { z0.s - z2.s } which is the same as { z0.s, z1.s, z2.s } + emitDispSveReg(currReg, opt, false); + printf(" - "); + emitDispSveReg((regNumber)(currReg + listSize - 1), opt, false); } printf(" }"); @@ -15709,6 +16983,9 @@ void emitter::emitDispArrangement(insOpts opt) case INS_OPTS_SCALABLE_D_WITH_PREDICATE_MERGE: str = "d"; break; + case INS_OPTS_SCALABLE_Q: + str = "q"; + break; default: assert(!"Invalid insOpt for vector register"); @@ -17429,9 +18706,9 @@ void emitter::emitDispInsHelp( // .H, { .S-.S }, # case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, true); // nnnn - emitDispImm(emitGetInsSC(id), false); // iiii + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, true); // nnnn + emitDispImm(emitGetInsSC(id), false); // iiii break; // , . @@ -17506,6 +18783,116 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg1(), id->idInsOpt(), false); // ddddd break; + // { .D }, /Z, [{, #, MUL VL}] + // Some of these formats may allow changing the element size instead of using 'D' for all instructions. + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + // { .B }, /Z, [{, #}] + // { .H }, /Z, [{, #}] + // { .S }, /Z, [{, #}] + // { .D }, /Z, [{, #}] + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + // { .Q, .Q }, /Z, [{, #, MUL VL}] + // { .Q, .Q, .Q }, /Z, [{, #, MUL VL}] + // { .Q, .Q, .Q, .Q }, /Z, [{, #, MUL VL}] + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + // { .B, .B }, /Z, [{, #, MUL VL}] + // { .H, .H }, /Z, [{, #, MUL VL}] + // { .S, .S }, /Z, [{, #, MUL VL}] + // { .D, .D }, /Z, [{, #, MUL VL}] + // { .B, .B, .B }, /Z, [{, #, MUL VL}] + // { .H, .H, .H }, /Z, [{, #, MUL VL}] + // { .S, .S, .S }, /Z, [{, #, MUL VL}] + // { .D, .D, .D }, /Z, [{, #, MUL VL}] + // { .B, .B, .B, .B }, /Z, [{, #, MUL VL}] + // { .H, .H, .H, .H }, /Z, [{, #, MUL VL}] + // { .S, .S, .S, .S }, /Z, [{, #, MUL VL}] + // { .D, .D, .D, .D }, /Z, [{, #, MUL VL}] + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + // { .Q, .Q }, , [{, #, MUL VL}] + // { .Q, .Q, .Q }, , [{, #, MUL VL}] + // { .Q, .Q, .Q, .Q }, , [{, #, MUL VL}] + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + // { .B }, , [{, #, MUL VL}] + // { .H }, , [{, #, MUL VL}] + // { .S }, , [{, #, MUL VL}] + // { .D }, , [{, #, MUL VL}] + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + // { .D }, , [{, #, MUL VL}] + // { .Q }, , [{, #, MUL VL}] + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + // { .B, .B }, , [{, #, MUL VL}] + // { .H, .H }, , [{, #, MUL VL}] + // { .S, .S }, , [{, #, MUL VL}] + // { .D, .D }, , [{, #, MUL VL}] + // { .B, .B, .B }, , [{, #, MUL VL}] + // { .H, .H, .H }, , [{, #, MUL VL}] + // { .S, .S, .S }, , [{, #, MUL VL}] + // { .D, .D, .D }, , [{, #, MUL VL}] + // { .B, .B, .B, .B }, , [{, #, MUL VL}] + // { .H, .H, .H, .H }, , [{, #, MUL VL}] + // { .S, .S, .S, .S }, , [{, #, MUL VL}] + // { .D, .D, .D, .D }, , [{, #, MUL VL}] + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn + if (imm != 0) + { + switch (fmt) + { + case IF_SVE_IO_3A: + // This does not have to be printed as hex. + // We only do it because the capstone disassembly displays this immediate as hex. + // We could not modify capstone without affecting other cases. + emitDispImm(emitGetInsSC(id), false, /* alwaysHex */ true); // iiii + break; + + case IF_SVE_IQ_3A: + case IF_SVE_IS_3A: + case IF_SVE_JE_3A: + case IF_SVE_JO_3A: + // This does not have to be printed as hex. + // We only do it because the capstone disassembly displays this immediate as hex. + // We could not modify capstone without affecting other cases. + emitDispImm(emitGetInsSC(id), true, /* alwaysHex */ true); // iiii + printf("mul vl"); + break; + + default: + emitDispImm(emitGetInsSC(id), true); // iiii + printf("mul vl"); + break; + } + } + printf("]"); + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -20055,6 +21442,259 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + switch (ins) + { + case INS_sve_ld1rqb: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1rob: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqh: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1roh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqw: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1row: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqd: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1rod: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + switch (ins) + { + case INS_sve_ld2q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld3q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld4q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + switch (ins) + { + case INS_sve_ld2b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3b: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4b: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3h: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4h: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3w: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4w: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3d: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4d: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + switch (ins) + { + case INS_sve_st2q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_st3q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_st4q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + switch (ins) + { + case INS_sve_st2b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3b: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4b: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3h: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4h: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3w: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4w: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3d: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4d: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index b7a776c6691e43..c35c5a5f93c918 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -53,7 +53,7 @@ void emitDispVectorReg(regNumber reg, insOpts opt, bool addComma); void emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma); void emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); void emitDispVectorElemList(regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma); -void emitDispSveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); +void emitDispSveConsecutiveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); void emitDispPredicateReg(regNumber reg, PredicateType ptype, insOpts opt, bool addComma); void emitDispLowPredicateReg(regNumber reg, PredicateType ptype, insOpts opt, bool addComma); void emitDispArrangement(insOpts opt); @@ -482,6 +482,37 @@ static code_t insEncodeSveElemsize(emitAttr size); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); +// Returns the first register list size for the given SVE instruction. +static int insGetSveReg1ListSize(instruction ins); + +// Returns the predicate type for the given SVE format. +static PredicateType insGetPredicateType(insFormat fmt); + +// Returns true if the specified instruction can encode the 'dtype' field. +static bool canEncodeSveElemsize_dtype(instruction ins); + +// Returns the encoding to select the 1/2/4/8/16 byte elemsize for an Arm64 Sve vector instruction +// for the 'dtype' field. +static code_t insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t code); + +// Returns the encoding for the immediate value as 4-bits at bit locations '19-16'. +static code_t insEncodeSimm4_19_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 2 as 4-bits at bit locations '19-16'. +static code_t insEncodeSimm4_MultipleOf2_19_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 3 as 4-bits at bit locations '19-16'. +static code_t insEncodeSimm4_MultipleOf3_19_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 4 as 4-bits at bit locations '19-16'. +static code_t insEncodeSimm4_MultipleOf4_19_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 16 as 4-bits at bit locations '19-16'. +static code_t insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 32 as 4-bits at bit locations '19-16'. +static code_t insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm); + // Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate. // This specifically encodes the field 'tszh:tszl' at bit locations '23-22:9-8'. static code_t insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift, size_t imm); @@ -502,6 +533,42 @@ static bool isStackRegister(regNumber reg) return (reg == REG_ZR) || (reg == REG_FP); } // ZR (R31) encodes the SP register +// Returns true if 'value' is a legal signed immediate 4 bit encoding (such as for LDNF1SW). +static bool isValidSimm4(ssize_t value) +{ + return (-8 <= value) && (value <= 7); +}; + +// Returns true if 'value' is a legal signed multiple of 2 immediate 4 bit encoding (such as for LD2Q). +static bool isValidSimm4_MultipleOf2(ssize_t value) +{ + return (-16 <= value) && (value <= 14) && (value % 2 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 3 immediate 4 bit encoding (such as for LD3Q). +static bool isValidSimm4_MultipleOf3(ssize_t value) +{ + return (-24 <= value) && (value <= 21) && (value % 3 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 4 immediate 4 bit encoding (such as for LD4Q). +static bool isValidSimm4_MultipleOf4(ssize_t value) +{ + return (-32 <= value) && (value <= 28) && (value % 4 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 16 immediate 4 bit encoding (such as for LD1RQB). +static bool isValidSimm4_MultipleOf16(ssize_t value) +{ + return (-128 <= value) && (value <= 112) && (value % 16 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 32 immediate 4 bit encoding (such as for LD1ROB). +static bool isValidSimm4_MultipleOf32(ssize_t value) +{ + return (-256 <= value) && (value <= 224) && (value % 32 == 0); +}; + // Returns true if 'value' is a legal unsigned immediate 5 bit encoding (such as for CCMP). static bool isValidUimm5(ssize_t value) { @@ -897,7 +964,7 @@ inline static bool insOptsScalable(insOpts opt) { // Opt is any of the scalable types. return ((insOptsScalableSimple(opt)) || (insOptsScalableWide(opt)) || (insOptsScalableWithSimdScalar(opt)) || - (insOptsScalableWithScalar(opt)) || (insOptsScalableWithSimdVector(opt)) || + (insOptsScalableWithScalar(opt)) || (insOptsScalableWithSimdVector(opt)) || (opt == INS_OPTS_SCALABLE_Q) || insOptsScalableWithPredicateMerge(opt)); } @@ -914,6 +981,12 @@ inline static bool insOptsScalableWords(insOpts opt) return ((opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } +inline static bool insOptsScalableWordsOrQuadwords(insOpts opt) +{ + // `opt` is any of the standard word, quadword and above scalable types. + return (insOptsScalableWords(opt) || (opt == INS_OPTS_SCALABLE_Q)); +} + inline static bool insOptsScalableAtLeastHalf(insOpts opt) { // `opt` is any of the standard half and above scalable types. diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 717b9c37fcbd87..3579c3ecf58508 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -274,6 +274,7 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_H, INS_OPTS_SCALABLE_S, INS_OPTS_SCALABLE_D, + INS_OPTS_SCALABLE_Q, INS_OPTS_SCALABLE_WIDE_B, INS_OPTS_SCALABLE_WIDE_H, diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index 6347df81bf524a..ad5094bd141e36 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -8,20 +8,20 @@ * nm -- textual name (for assembly dipslay) * info -- miscellaneous instruction info (load/store/compare/ASIMD right shift) * fmt -- encoding format used by this instruction -* e1 -- encoding 1 -* e2 -- encoding 2 -* e3 -- encoding 3 -* e4 -- encoding 4 -* e5 -- encoding 5 -* e6 -- encoding 6 -* e7 -- encoding 7 -* e8 -- encoding 8 -* e9 -- encoding 9 -* e10 -- encoding 10 -* e11 -- encoding 11 -* e12 -- encoding 12 -* e13 -- encoding 13 -*****************************************************************************/ + * e1 -- encoding 1 + * e2 -- encoding 2 + * e3 -- encoding 3 + * e4 -- encoding 4 + * e5 -- encoding 5 + * e6 -- encoding 6 + * e7 -- encoding 7 + * e8 -- encoding 8 + * e9 -- encoding 9 + * e10 -- encoding 10 + * e11 -- encoding 11 + * e12 -- encoding 12 + * e13 -- encoding 13 + *****************************************************************************/ #if !defined(TARGET_ARM64) #error Unexpected target type #endif @@ -127,7 +127,7 @@ INST9(ld1h, "ld1h", 0, IF_SV // LD1H {.D }, /Z, [, .D, LSL #1] SVE_HW_4B 11000100111mmmmm 110gggnnnnnttttt C4E0 C000 // LD1H {.D }, /Z, [, .D] SVE_HW_4B_D 11000100110mmmmm 110gggnnnnnttttt C4C0 C000 // LD1H {.D }, /Z, [.D{, #}] SVE_HX_3A_E 10000100101iiiii 110gggnnnnnttttt 84A0 C000 - // LD1H {.D }, /Z, [{, #, MUL VL}] SVE_IJ_3A_G 101001001000iiii 101gggnnnnnttttt A480 A000 + // LD1H {.X }, /Z, [{, #, MUL VL}] SVE_IJ_3A_G 101001001000iiii 101gggnnnnnttttt A480 A000 // LD1H {.D }, /Z, [, , LSL #1] SVE_IK_4A_I 10100100100mmmmm 010gggnnnnnttttt A480 4000 @@ -140,7 +140,7 @@ INST9(ld1w, "ld1w", 0, IF_SV // LD1W {.D }, /Z, [, .D, LSL #2] SVE_HW_4B 11000101011mmmmm 110gggnnnnnttttt C560 C000 // LD1W {.D }, /Z, [, .D] SVE_HW_4B_D 11000101010mmmmm 110gggnnnnnttttt C540 C000 // LD1W {.D }, /Z, [.D{, #}] SVE_HX_3A_E 10000101001iiiii 110gggnnnnnttttt 8520 C000 - // LD1W {.D }, /Z, [{, #, MUL VL}] SVE_IH_3A_F 101001010000iiii 001gggnnnnnttttt A500 2000 + // LD1W {.X }, /Z, [{, #, MUL VL}] SVE_IH_3A_F 101001010000iiii 001gggnnnnnttttt A500 2000 // LD1W {.D }, /Z, [, , LSL #2] SVE_II_4A_H 10100101000mmmmm 000gggnnnnnttttt A500 0000 @@ -276,7 +276,7 @@ INST6(ld1b, "ld1b", 0, IF_SV // LD1B {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 010gggnnnnnttttt 8400 4000 // LD1B {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 110gggnnnnnttttt C440 C000 // LD1B {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 110gggnnnnnttttt 8420 C000 - // LD1B {.D }, /Z, [{, #, MUL VL}] SVE_IJ_3A_E 101001000000iiii 101gggnnnnnttttt A400 A000 + // LD1B {.B }, /Z, [{, #, MUL VL}] SVE_IJ_3A_E 101001000000iiii 101gggnnnnnttttt A400 A000 // LD1B {.D }, /Z, [, ] SVE_IK_4A_H 10100100000mmmmm 010gggnnnnnttttt A400 4000 @@ -2816,7 +2816,7 @@ INST1(ld1rw, "ld1rw", 0, IF_SV // enum name info SVE_IL_3A_C INST1(ldnf1b, "ldnf1b", 0, IF_SVE_IL_3A_C, 0xA410A000 ) - // LDNF1B {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_C 101001000001iiii 101gggnnnnnttttt A410 A000 + // LDNF1B {.B }, /Z, [{, #, MUL VL}] SVE_IL_3A_C 101001000001iiii 101gggnnnnnttttt A410 A000 // enum name info SVE_IL_3A @@ -2829,7 +2829,7 @@ INST1(ldnf1sw, "ldnf1sw", 0, IF_SV // enum name info SVE_IL_3A_B INST1(ldnf1h, "ldnf1h", 0, IF_SVE_IL_3A_B, 0xA490A000 ) - // LDNF1H {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_B 101001001001iiii 101gggnnnnnttttt A490 A000 + // LDNF1H {.X }, /Z, [{, #, MUL VL}] SVE_IL_3A_B 101001001001iiii 101gggnnnnnttttt A490 A000 INST1(ldnf1sb, "ldnf1sb", 0, IF_SVE_IL_3A_B, 0xA590A000 ) // LDNF1SB {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_B 101001011001iiii 101gggnnnnnttttt A590 A000 @@ -2840,7 +2840,7 @@ INST1(ldnf1sh, "ldnf1sh", 0, IF_SV // LDNF1SH {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_A 101001010001iiii 101gggnnnnnttttt A510 A000 INST1(ldnf1w, "ldnf1w", 0, IF_SVE_IL_3A_A, 0xA550A000 ) - // LDNF1W {.D }, /Z, [{, #, MUL VL}] SVE_IL_3A_A 101001010101iiii 101gggnnnnnttttt A550 A000 + // LDNF1W {.S }, /Z, [{, #, MUL VL}] SVE_IL_3A_A 101001010101iiii 101gggnnnnnttttt A550 A000 // enum name info SVE_IW_4A