Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ac3a367

Browse files
committed
Add MCP and skill reliability report (getagentseal#357)
2 parents cb6265e + f4c278e commit ac3a367

3 files changed

Lines changed: 405 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
## Unreleased
44

55
### Added (CLI)
6+
- **MCP and skill reliability report.** `codeburn optimize` now detects MCP
7+
servers and skills whose edit turns are disproportionately retry-heavy,
8+
using turn-level MCP/Skill call evidence and a shared-turn token estimate so
9+
one retry-heavy turn is not double-counted across multiple capabilities.
10+
Thanks @ozymandiashh. (#357)
611
- **Tooling breakdowns in dashboard and menubar.** New panels showing core
712
tools, MCP servers, and shell command usage per session and across periods.
813
- **File-aware retry detection with typed ToolCall.** One-shot rate now tracks

src/optimize.ts

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,15 @@ const WORTH_IT_LOW_MAX_CANDIDATES = 2
9999
const WORTH_IT_LOW_MAX_TOTAL_COST_USD = 10
100100
const WORTH_IT_HIGH_MIN_CANDIDATES = 10
101101
const WORTH_IT_HIGH_TOTAL_COST_USD = 50
102+
const CAPABILITY_RELIABILITY_MIN_EDIT_TURNS = 5
103+
const CAPABILITY_RELIABILITY_MIN_RETRY_TURNS = 3
104+
const CAPABILITY_RELIABILITY_MIN_RETRY_RATE = 0.50
105+
const CAPABILITY_RELIABILITY_RECOVERY_FRACTION = 0.50
106+
const CAPABILITY_RELIABILITY_PREVIEW = 5
107+
const CAPABILITY_RELIABILITY_LOW_MAX_CANDIDATES = 1
108+
const CAPABILITY_RELIABILITY_LOW_MAX_TOKENS = 50_000
109+
const CAPABILITY_RELIABILITY_HIGH_MIN_CANDIDATES = 5
110+
const CAPABILITY_RELIABILITY_HIGH_IMPACT_TOKENS = 200_000
102111

103112
// ============================================================================
104113
// Scoring constants
@@ -895,6 +904,245 @@ export function detectMcpToolCoverage(
895904
}
896905
}
897906

907+
type CapabilityKind = 'mcp' | 'skill'
908+
909+
type CapabilityRef = {
910+
kind: CapabilityKind
911+
name: string
912+
}
913+
914+
type CapabilityReliabilityAccumulator = CapabilityRef & {
915+
editTurns: number
916+
retryTurns: number
917+
oneShotTurns: number
918+
retries: number
919+
tokensTouched: number
920+
projects: Set<string>
921+
retryTurnSavings: Map<string, number>
922+
}
923+
924+
export type CapabilityReliabilityCandidate = {
925+
kind: CapabilityKind
926+
name: string
927+
editTurns: number
928+
retryTurns: number
929+
oneShotTurns: number
930+
retries: number
931+
retryRate: number
932+
tokensTouched: number
933+
tokensSaved: number
934+
projects: string[]
935+
}
936+
937+
function capabilityKey(ref: CapabilityRef): string {
938+
return `${ref.kind}:${ref.name}`
939+
}
940+
941+
function formatCapabilityKind(kind: CapabilityKind): string {
942+
return kind === 'mcp' ? 'MCP server' : 'skill'
943+
}
944+
945+
function mcpServerFromToolName(fqn: string): string | null {
946+
const parts = fqn.split('__')
947+
if (parts.length < 3 || parts[0] !== 'mcp') return null
948+
return parts[1] || null
949+
}
950+
951+
function collectReliabilityCapabilities(turn: ProjectSummary['sessions'][number]['turns'][number]): Map<string, CapabilityRef> {
952+
const capabilities = new Map<string, CapabilityRef>()
953+
954+
for (const call of turn.assistantCalls) {
955+
for (const fqn of call.mcpTools) {
956+
const server = mcpServerFromToolName(fqn)
957+
if (!server) continue
958+
const ref: CapabilityRef = { kind: 'mcp', name: server }
959+
capabilities.set(capabilityKey(ref), ref)
960+
}
961+
for (const rawSkill of call.skills ?? []) {
962+
const skill = rawSkill.trim()
963+
if (!skill) continue
964+
const ref: CapabilityRef = { kind: 'skill', name: skill }
965+
capabilities.set(capabilityKey(ref), ref)
966+
}
967+
}
968+
969+
return capabilities
970+
}
971+
972+
function turnEffectiveTokenTotal(turn: ProjectSummary['sessions'][number]['turns'][number]): number {
973+
return Math.round(turn.assistantCalls.reduce((sum, call) =>
974+
sum
975+
+ call.usage.inputTokens
976+
+ call.usage.outputTokens
977+
+ call.usage.cacheCreationInputTokens * CACHE_WRITE_MULTIPLIER
978+
+ call.usage.cacheReadInputTokens * CACHE_READ_DISCOUNT,
979+
0))
980+
}
981+
982+
function reliabilityTurnKey(
983+
project: ProjectSummary,
984+
session: ProjectSummary['sessions'][number],
985+
turn: ProjectSummary['sessions'][number]['turns'][number],
986+
turnIndex: number,
987+
): string {
988+
return `${project.projectPath || project.project}:${session.sessionId}:${turn.timestamp}:${turnIndex}`
989+
}
990+
991+
function getReliabilityAccumulator(
992+
stats: Map<string, CapabilityReliabilityAccumulator>,
993+
ref: CapabilityRef,
994+
): CapabilityReliabilityAccumulator {
995+
const key = capabilityKey(ref)
996+
let acc = stats.get(key)
997+
if (!acc) {
998+
acc = {
999+
...ref,
1000+
editTurns: 0,
1001+
retryTurns: 0,
1002+
oneShotTurns: 0,
1003+
retries: 0,
1004+
tokensTouched: 0,
1005+
projects: new Set(),
1006+
retryTurnSavings: new Map(),
1007+
}
1008+
stats.set(key, acc)
1009+
}
1010+
return acc
1011+
}
1012+
1013+
function findCapabilityReliabilityCandidates(projects: ProjectSummary[]): CapabilityReliabilityCandidate[] {
1014+
const stats = new Map<string, CapabilityReliabilityAccumulator>()
1015+
1016+
for (const project of projects) {
1017+
for (const session of project.sessions) {
1018+
for (let turnIndex = 0; turnIndex < session.turns.length; turnIndex++) {
1019+
const turn = session.turns[turnIndex]!
1020+
if (!turn.hasEdits) continue
1021+
1022+
const capabilities = collectReliabilityCapabilities(turn)
1023+
if (capabilities.size === 0) continue
1024+
1025+
const turnTokens = turnEffectiveTokenTotal(turn)
1026+
const turnKey = reliabilityTurnKey(project, session, turn, turnIndex)
1027+
const recoverableTokens = turn.retries > 0
1028+
? Math.round(turnTokens * CAPABILITY_RELIABILITY_RECOVERY_FRACTION)
1029+
: 0
1030+
1031+
for (const ref of capabilities.values()) {
1032+
const acc = getReliabilityAccumulator(stats, ref)
1033+
acc.editTurns++
1034+
acc.tokensTouched += turnTokens
1035+
acc.projects.add(project.project)
1036+
if (turn.retries > 0) {
1037+
acc.retryTurns++
1038+
acc.retries += turn.retries
1039+
acc.retryTurnSavings.set(turnKey, recoverableTokens)
1040+
} else {
1041+
acc.oneShotTurns++
1042+
}
1043+
}
1044+
}
1045+
}
1046+
}
1047+
1048+
const candidates: CapabilityReliabilityCandidate[] = []
1049+
for (const acc of stats.values()) {
1050+
if (acc.editTurns < CAPABILITY_RELIABILITY_MIN_EDIT_TURNS) continue
1051+
if (acc.retryTurns < CAPABILITY_RELIABILITY_MIN_RETRY_TURNS) continue
1052+
const retryRate = acc.retryTurns / acc.editTurns
1053+
if (retryRate < CAPABILITY_RELIABILITY_MIN_RETRY_RATE) continue
1054+
1055+
candidates.push({
1056+
kind: acc.kind,
1057+
name: acc.name,
1058+
editTurns: acc.editTurns,
1059+
retryTurns: acc.retryTurns,
1060+
oneShotTurns: acc.oneShotTurns,
1061+
retries: acc.retries,
1062+
retryRate,
1063+
tokensTouched: acc.tokensTouched,
1064+
tokensSaved: Array.from(acc.retryTurnSavings.values()).reduce((sum, tokens) => sum + tokens, 0),
1065+
projects: Array.from(acc.projects).sort(),
1066+
})
1067+
}
1068+
1069+
candidates.sort((a, b) =>
1070+
b.retryRate - a.retryRate
1071+
|| b.retries - a.retries
1072+
|| b.tokensSaved - a.tokensSaved
1073+
|| a.kind.localeCompare(b.kind)
1074+
|| a.name.localeCompare(b.name)
1075+
)
1076+
return candidates
1077+
}
1078+
1079+
export function detectCapabilityReliability(projects: ProjectSummary[]): WasteFinding | null {
1080+
const candidates = findCapabilityReliabilityCandidates(projects)
1081+
if (candidates.length === 0) return null
1082+
1083+
const candidateKeys = new Set(candidates.map(c => capabilityKey(c)))
1084+
const uniqueRetryTurnSavings = new Map<string, number>()
1085+
for (const project of projects) {
1086+
for (const session of project.sessions) {
1087+
for (let turnIndex = 0; turnIndex < session.turns.length; turnIndex++) {
1088+
const turn = session.turns[turnIndex]!
1089+
if (!turn.hasEdits || turn.retries <= 0) continue
1090+
const capabilities = collectReliabilityCapabilities(turn)
1091+
if (capabilities.size === 0) continue
1092+
1093+
const hasFlaggedCapability = Array.from(capabilities.keys()).some(key => candidateKeys.has(key))
1094+
if (!hasFlaggedCapability) continue
1095+
1096+
const key = reliabilityTurnKey(project, session, turn, turnIndex)
1097+
const tokens = Math.round(turnEffectiveTokenTotal(turn) * CAPABILITY_RELIABILITY_RECOVERY_FRACTION)
1098+
uniqueRetryTurnSavings.set(key, Math.max(uniqueRetryTurnSavings.get(key) ?? 0, tokens))
1099+
}
1100+
}
1101+
}
1102+
1103+
const tokensSaved = Array.from(uniqueRetryTurnSavings.values()).reduce((sum, tokens) => sum + tokens, 0)
1104+
const preview = candidates.slice(0, CAPABILITY_RELIABILITY_PREVIEW)
1105+
const list = preview.map(c => {
1106+
const percent = Math.round(c.retryRate * 100)
1107+
const projects = c.projects.length > 1 ? ` across ${c.projects.length} projects` : ` in ${c.projects[0] ?? 'one project'}`
1108+
return `${formatCapabilityKind(c.kind)} ${c.name}: ${c.retryTurns}/${c.editTurns} edit turns retried (${percent}%), ${c.retries} retries${projects}`
1109+
}).join('; ')
1110+
const extra = candidates.length > preview.length ? `; +${candidates.length - preview.length} more` : ''
1111+
1112+
const names = preview
1113+
.map(c => `${formatCapabilityKind(c.kind)} ${c.name}`)
1114+
.join(', ')
1115+
1116+
let impact: Impact
1117+
if (candidates.length >= CAPABILITY_RELIABILITY_HIGH_MIN_CANDIDATES || tokensSaved >= CAPABILITY_RELIABILITY_HIGH_IMPACT_TOKENS) {
1118+
impact = 'high'
1119+
} else if (candidates.length <= CAPABILITY_RELIABILITY_LOW_MAX_CANDIDATES && tokensSaved < CAPABILITY_RELIABILITY_LOW_MAX_TOKENS) {
1120+
impact = 'low'
1121+
} else {
1122+
impact = 'medium'
1123+
}
1124+
1125+
const kindSet = new Set(candidates.map(c => c.kind))
1126+
const noun = kindSet.size === 1
1127+
? (kindSet.has('mcp') ? 'MCP server' : 'skill')
1128+
: 'MCP/skill capability'
1129+
const pluralNoun = noun === 'MCP/skill capability' ? 'MCP/skill capabilities' : `${noun}s`
1130+
const verb = candidates.length === 1 ? 'correlates' : 'correlate'
1131+
1132+
return {
1133+
title: `${candidates.length} ${candidates.length === 1 ? noun : pluralNoun} ${verb} with retry-heavy edits`,
1134+
explanation: `Edit turns using these capabilities are retry-heavy: ${list}${extra}. This is a correlation report, not proof of causation; compare the retry-heavy turns with one-shot turns before changing MCP scope or skill instructions.`,
1135+
impact,
1136+
tokensSaved,
1137+
fix: {
1138+
type: 'paste',
1139+
destination: 'prompt',
1140+
label: 'Ask Claude to audit the retry-heavy capability before changing config:',
1141+
text: `Investigate these retry-correlated capabilities: ${names}. Compare edit turns with retries against one-shot edit turns, identify whether the MCP server or skill actually caused rework, then propose a scoped MCP config or skill-instruction change with session evidence. Do not remove a capability solely because it appears in this report.`,
1142+
},
1143+
}
1144+
}
1145+
8981146
export function detectUnusedMcp(
8991147
calls: ToolCall[],
9001148
projects: ProjectSummary[],
@@ -1800,6 +2048,7 @@ export async function scanAndDetect(
18002048
() => detectDuplicateReads(toolCalls, dateRange),
18012049
() => detectUnusedMcp(toolCalls, projects, projectCwds, mcpCoverage),
18022050
() => detectMcpToolCoverage(projects, mcpCoverage),
2051+
() => detectCapabilityReliability(projects),
18032052
() => detectLowWorthSessions(projects),
18042053
() => detectContextBloat(projects, lowWorthSessionIds),
18052054
() => detectSessionOutliers(projects, outlierExclusions),

0 commit comments

Comments
 (0)