Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit cee7834

Browse files
authored
Perfscore dumping/dasm improvements (dotnet#264)
* Perf Score dumping/dasm improvements - Show how much each BasicBlock contributes to the overall PerfScore - Change the string "perf score" to "PerfScore when reporting in the Disassembly and Dump files Add PerfScore support for all currently used AVX2. SSE, etc... instructions * Code Review feedback * Update the INS_lea throughput and latencies Added comments for INS_movd and INS_movq and a few others * Fix x86 break - Use baseRegisterRequiresDisplacement(baseReg) * Fix the RIP relative block for INS_lea * Fixed INS_lea special cases: IF_RWR_LABEL and IF_RWR_SRD
1 parent d8302ce commit cee7834

File tree

4 files changed

+574
-172
lines changed

4 files changed

+574
-172
lines changed

src/coreclr/src/jit/codegencommon.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2282,11 +2282,6 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
22822282
trackedStackPtrsContig = !compiler->opts.compDbgEnC;
22832283
#endif
22842284

2285-
#ifdef DEBUG
2286-
/* We're done generating code for this function */
2287-
compiler->compCodeGenDone = true;
2288-
#endif
2289-
22902285
compiler->EndPhase(PHASE_GENERATE_CODE);
22912286

22922287
codeSize =
@@ -2296,6 +2291,13 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
22962291

22972292
compiler->EndPhase(PHASE_EMIT_CODE);
22982293

2294+
#ifdef DEBUG
2295+
assert(compiler->compCodeGenDone == false);
2296+
2297+
/* We're done generating code for this function */
2298+
compiler->compCodeGenDone = true;
2299+
#endif
2300+
22992301
#if defined(DEBUG) || defined(LATE_DISASM)
23002302
// Add code size information into the Perf Score
23012303
// All compPerfScore calculations must be performed using doubles
@@ -2307,9 +2309,10 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
23072309
#ifdef DEBUG
23082310
if (compiler->opts.disAsm || verbose)
23092311
{
2310-
printf("; Total bytes of code %d, prolog size %d, perf score %.2f, (MethodHash=%08x) for method %s\n", codeSize,
2311-
prologSize, compiler->info.compPerfScore, compiler->info.compMethodHash(), compiler->info.compFullName);
2312-
printf("; ============================================================\n");
2312+
printf("\n; Total bytes of code %d, prolog size %d, PerfScore %.2f, (MethodHash=%08x) for method %s\n",
2313+
codeSize, prologSize, compiler->info.compPerfScore, compiler->info.compMethodHash(),
2314+
compiler->info.compFullName);
2315+
printf("; ============================================================\n\n");
23132316
printf(""); // in our logic this causes a flush
23142317
}
23152318

src/coreclr/src/jit/emit.cpp

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,13 +1105,13 @@ float emitter::insEvaluateExecutionCost(instrDesc* id)
11051105
assert(throughput > 0.0);
11061106
assert(latency >= 0.0);
11071107

1108-
if ((memAccessKind == PERFSCORE_MEMORY_WRITE) && (latency <= PERFSCORE_LATENCY_WR_GENERAL))
1108+
if (memAccessKind == PERFSCORE_MEMORY_WRITE)
11091109
{
1110-
// We assume that we won't read back from memory for any writes
1111-
// Thus we don't pay latency costs for writes.
1112-
latency = 0.0;
1110+
// We assume that we won't read back from memory for the next WR_GENERAL (3) cycles
1111+
// Thus we normally won't pay latency costs for writes.
1112+
latency = max(0.0f, latency - PERFSCORE_LATENCY_WR_GENERAL);
11131113
}
1114-
if (latency >= 1.0)
1114+
else if (latency >= 1.0) // Otherwise, If we aren't performing a memory write
11151115
{
11161116
// We assume that the processor's speculation will typically eliminate one cycle of latency
11171117
//
@@ -2376,7 +2376,8 @@ void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMas
23762376
#if defined(DEBUG) || defined(LATE_DISASM)
23772377
else
23782378
{
2379-
emitCurIG->igWeight = getCurrentBlockWeight();
2379+
emitCurIG->igWeight = getCurrentBlockWeight();
2380+
emitCurIG->igPerfScore = 0.0;
23802381
}
23812382
#endif
23822383

@@ -3289,7 +3290,12 @@ void emitter::emitDispIG(insGroup* ig, insGroup* igPrev, bool verbose)
32893290
}
32903291
else
32913292
{
3292-
printf("offs=%06XH, size=%04XH, bbWeight=%s", ig->igOffs, ig->igSize, refCntWtd2str(ig->igWeight));
3293+
printf("offs=%06XH, size=%04XH", ig->igOffs, ig->igSize);
3294+
3295+
if (emitComp->compCodeGenDone)
3296+
{
3297+
printf(", bbWeight=%s PerfScore %.2f", refCntWtd2str(ig->igWeight), ig->igPerfScore);
3298+
}
32933299

32943300
if (ig->igFlags & IGF_GC_VARS)
32953301
{
@@ -3415,7 +3421,9 @@ size_t emitter::emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp)
34153421
#if defined(DEBUG) || defined(LATE_DISASM)
34163422
float insExeCost = insEvaluateExecutionCost(id);
34173423
// All compPerfScore calculations must be performed using doubles
3418-
emitComp->info.compPerfScore += (double)(ig->igWeight / (double)BB_UNITY_WEIGHT) * insExeCost;
3424+
double insPerfScore = (double)(ig->igWeight / (double)BB_UNITY_WEIGHT) * insExeCost;
3425+
emitComp->info.compPerfScore += insPerfScore;
3426+
ig->igPerfScore += insPerfScore;
34193427
#endif // defined(DEBUG) || defined(LATE_DISASM)
34203428

34213429
// printf("[S=%02u]\n", emitCurStackLvl);
@@ -4841,7 +4849,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
48414849
instrDesc* id = (instrDesc*)ig->igData;
48424850

48434851
#ifdef DEBUG
4844-
48454852
/* Print the IG label, but only if it is a branch label */
48464853

48474854
if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
@@ -4853,17 +4860,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
48534860
}
48544861
else
48554862
{
4856-
printf("\nG_M%03u_IG%02u:", Compiler::s_compMethodsCount, ig->igNum);
4857-
4858-
// Display the block weight, but only when it isn't the standard BB_UNITY_WEIGHT
4859-
if (ig->igWeight != BB_UNITY_WEIGHT)
4860-
{
4861-
printf("\t\t;; bbWeight=%s", refCntWtd2str(ig->igWeight));
4862-
}
4863-
printf("\n");
4863+
printf("\nG_M%03u_IG%02u:\n", Compiler::s_compMethodsCount, ig->igNum);
48644864
}
48654865
}
4866-
48674866
#endif // DEBUG
48684867

48694868
BYTE* bp = cp;
@@ -4952,6 +4951,13 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
49524951
castto(id, BYTE*) += emitIssue1Instr(ig, id, &cp);
49534952
}
49544953

4954+
#ifdef DEBUG
4955+
if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
4956+
{
4957+
printf("\t\t\t\t\t\t;; bbWeight=%s PerfScore %.2f", refCntWtd2str(ig->igWeight), ig->igPerfScore);
4958+
}
4959+
#endif // DEBUG
4960+
49554961
emitCurIG = nullptr;
49564962

49574963
assert(ig->igSize >= cp - bp);
@@ -6846,7 +6852,8 @@ insGroup* emitter::emitAllocIG()
68466852
#endif
68476853

68486854
#if defined(DEBUG) || defined(LATE_DISASM)
6849-
ig->igWeight = getCurrentBlockWeight();
6855+
ig->igWeight = getCurrentBlockWeight();
6856+
ig->igPerfScore = 0.0;
68506857
#endif
68516858

68526859
#if EMITTER_STATS

src/coreclr/src/jit/emit.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,8 @@ struct insGroup
249249
insGroup* igSelf; // for consistency checking
250250
#endif
251251
#if defined(DEBUG) || defined(LATE_DISASM)
252-
BasicBlock::weight_t igWeight; // the block weight used for this insGroup
252+
BasicBlock::weight_t igWeight; // the block weight used for this insGroup
253+
double igPerfScore; // The PerfScore for this insGroup
253254
#endif
254255

255256
UNATIVE_OFFSET igNum; // for ordering (and display) purposes
@@ -1235,6 +1236,7 @@ class emitter
12351236
#define PERFSCORE_THROUGHPUT_10C 10.0f // slower - 10 cycles
12361237
#define PERFSCORE_THROUGHPUT_13C 13.0f // slower - 13 cycles
12371238
#define PERFSCORE_THROUGHPUT_25C 25.0f // slower - 25 cycles
1239+
#define PERFSCORE_THROUGHPUT_33C 33.0f // slower - 33 cycles
12381240
#define PERFSCORE_THROUGHPUT_52C 52.0f // slower - 52 cycles
12391241
#define PERFSCORE_THROUGHPUT_57C 57.0f // slower - 57 cycles
12401242

@@ -1253,6 +1255,8 @@ class emitter
12531255
#define PERFSCORE_LATENCY_6C 6.0f
12541256
#define PERFSCORE_LATENCY_7C 7.0f
12551257
#define PERFSCORE_LATENCY_8C 8.0f
1258+
#define PERFSCORE_LATENCY_9C 9.0f
1259+
#define PERFSCORE_LATENCY_10C 10.0f
12561260
#define PERFSCORE_LATENCY_11C 11.0f
12571261
#define PERFSCORE_LATENCY_12C 12.0f
12581262
#define PERFSCORE_LATENCY_13C 13.0f
@@ -1261,6 +1265,7 @@ class emitter
12611265
#define PERFSCORE_LATENCY_26C 26.0f
12621266
#define PERFSCORE_LATENCY_62C 62.0f
12631267
#define PERFSCORE_LATENCY_69C 69.0f
1268+
#define PERFSCORE_LATENCY_400C 400.0f // Intel microcode issue with these instuctions
12641269

12651270
#define PERFSCORE_LATENCY_BRANCH_DIRECT 1.0f // cost of an unconditional branch
12661271
#define PERFSCORE_LATENCY_BRANCH_COND 2.0f // includes cost of a possible misprediction

0 commit comments

Comments
 (0)