28
28
# Lists the Github workflows we want to track. Maps the Github job name to
29
29
# the metric name prefix in grafana.
30
30
# This metric name is also used as a key in the job->name map.
31
- GITHUB_WORKFLOW_TO_TRACK = {"CI Checks" : "github_llvm_premerge_checks" }
31
+ GITHUB_WORKFLOW_TO_TRACK = {
32
+ "CI Checks" : "github_llvm_premerge_checks" ,
33
+ "Build and Test libc++" : "github_libcxx_premerge_checks" ,
34
+ }
32
35
33
36
# Lists the Github jobs to track for a given workflow. The key is the stable
34
37
# name (metric name) of the workflow (see GITHUB_WORKFLOW_TO_TRACK).
38
41
"github_llvm_premerge_checks" : {
39
42
"Build and Test Linux" : "premerge_linux" ,
40
43
"Build and Test Windows" : "premerge_windows" ,
41
- }
44
+ },
45
+ "github_libcxx_premerge_checks" : {
46
+ "stage1" : "premerge_libcxx_stage1" ,
47
+ "stage2" : "premerge_libcxx_stage2" ,
48
+ "stage3" : "premerge_libcxx_stage3" ,
49
+ },
42
50
}
43
51
44
52
# The number of workflows to pull when sampling Github workflows.
62
70
# by trial and error).
63
71
GRAFANA_METRIC_MAX_AGE_MN = 120
64
72
65
-
66
73
@dataclass
67
74
class JobMetrics :
68
75
job_name : str
69
76
queue_time : int
70
77
run_time : int
71
78
status : int
79
+ created_at_ns : int
80
+ started_at_ns : int
72
81
completed_at_ns : int
73
82
workflow_id : int
74
83
workflow_name : str
@@ -81,6 +90,159 @@ class GaugeMetric:
81
90
time_ns : int
82
91
83
92
93
+ @dataclass
94
+ class AggregateMetric :
95
+ aggregate_name : str
96
+ aggregate_queue_time : int
97
+ aggregate_run_time : int
98
+ aggregate_status : int
99
+ completed_at_ns : int
100
+ workflow_id : int
101
+
102
+
103
+ def _construct_aggregate (ag_name : str , job_list : list [JobMetrics ]) -> AggregateMetric :
104
+ """Create a libc++ AggregateMetric from a list of libc++ JobMetrics
105
+
106
+ How aggregates are computed:
107
+ queue time: Time from when first job in group is created until last job in
108
+ group has started.
109
+ run time: Time from when first job in group starts running until last job
110
+ in group finishes running.
111
+ status: logical 'and' of all the job statuses in the group.
112
+
113
+ Args:
114
+ ag_name: The name for this particular AggregateMetric
115
+ job_list: This list of JobMetrics to be combined into the AggregateMetric.
116
+ The input list should contain all (and only!) the libc++ JobMetrics
117
+ for a particular stage and a particular workflow_id.
118
+
119
+ Returns:
120
+ Returns the AggregateMetric constructed from the inputs.
121
+ """
122
+
123
+ # Initialize the aggregate values
124
+ earliest_create = job_list [0 ].created_at_ns
125
+ earliest_start = job_list [0 ].started_at_ns
126
+ earliest_complete = job_list [0 ].completed_at_ns
127
+ latest_start = job_list [0 ].started_at_ns
128
+ latest_complete = job_list [0 ].completed_at_ns
129
+ ag_status = job_list [0 ].status
130
+ ag_workflow_id = job_list [0 ].workflow_id
131
+
132
+ # Go through rest of jobs for this workflow id, if any, updating stats
133
+ for job in job_list [1 :]:
134
+ # Update the status
135
+ ag_status = ag_status and job .status
136
+ # Get the earliest & latest times
137
+ if job .created_at_ns < earliest_create :
138
+ earliest_create = job .created_at_ns
139
+ if job .completed_at_ns < earliest_complete :
140
+ earliest_complete = job .completed_at_ns
141
+ if job .started_at_ns > latest_start :
142
+ latest_start = job .started_at_ns
143
+ if job .started_at_ns < earliest_start :
144
+ earliest_start = job .started_at_ns
145
+ if job .completed_at_ns > latest_complete :
146
+ latest_complete = job .completed_at_ns
147
+
148
+ # Compute aggregate run time (in seconds, not ns)
149
+ ag_run_time = (latest_complete - earliest_start ) / 1000000000
150
+ # Compute aggregate queue time (in seconds, not ns)
151
+ ag_queue_time = (latest_start - earliest_create ) / 1000000000
152
+ # Append the aggregate metrics to the workflow metrics list.
153
+ return AggregateMetric (
154
+ ag_name , ag_queue_time , ag_run_time , ag_status , latest_complete , ag_workflow_id
155
+ )
156
+
157
+
158
+ def create_and_append_libcxx_aggregates (workflow_metrics : list [JobMetrics ]):
159
+ """Find libc++ JobMetric entries and create aggregate metrics for them.
160
+
161
+ Sort the libc++ JobMetric entries by workflow id, and for each workflow
162
+ id group them by stages. Call _construct_aggregate to reate an aggregate
163
+ metric for each stage for each unique workflow id. Append each aggregate
164
+ metric to the input workflow_metrics list.
165
+
166
+ Args:
167
+ workflow_metrics: A list of JobMetrics entries collected so far.
168
+ """
169
+ # Separate the jobs by workflow_id. Only look at JobMetrics entries.
170
+ aggregate_data = dict ()
171
+ for job in workflow_metrics :
172
+ # Only want to look at JobMetrics
173
+ if not isinstance (job , JobMetrics ):
174
+ continue
175
+ # Only want libc++ jobs.
176
+ if job .workflow_name != "Build and Test libc++" :
177
+ continue
178
+ if job .workflow_id not in aggregate_data .keys ():
179
+ aggregate_data [job .workflow_id ] = [job ]
180
+ else :
181
+ aggregate_data [job .workflow_id ].append (job )
182
+
183
+ # Go through each aggregate_data list (workflow id) and find all the
184
+ # needed data
185
+ for ag_workflow_id in aggregate_data :
186
+ job_list = aggregate_data [ag_workflow_id ]
187
+ stage1_jobs = list ()
188
+ stage2_jobs = list ()
189
+ stage3_jobs = list ()
190
+ # sort jobs into stage1, stage2, & stage3.
191
+ for job in job_list :
192
+ if job .job_name .find ("stage1" ) > 0 :
193
+ stage1_jobs .append (job )
194
+ elif job .job_name .find ("stage2" ) > 0 :
195
+ stage2_jobs .append (job )
196
+ elif job .job_name .find ("stage3" ) > 0 :
197
+ stage3_jobs .append (job )
198
+
199
+ if len (stage1_jobs ) > 0 :
200
+ aggregate = _construct_aggregate (
201
+ "github_libcxx_premerge_checks_stage1_aggregate" , stage1_jobs
202
+ )
203
+ workflow_metrics .append (aggregate )
204
+ if len (stage2_jobs ) > 0 :
205
+ aggregate = _construct_aggregate (
206
+ "github_libcxx_premerge_checks_stage2_aggregate" , stage2_jobs
207
+ )
208
+ workflow_metrics .append (aggregate )
209
+ if len (stage3_jobs ) > 0 :
210
+ aggregate = _construct_aggregate (
211
+ "github_libcxx_premerge_checks_stage3_aggregate" , stage3_jobs
212
+ )
213
+ workflow_metrics .append (aggregate )
214
+
215
+
216
+ def clean_up_libcxx_job_name (old_name : str ) -> str :
217
+ """Convert libcxx job names to generically legal strings.
218
+
219
+ Args:
220
+ old_name: A string with the full name of the libc++ test that was run.
221
+
222
+ Returns:
223
+ Returns the input string with characters that might not be acceptable
224
+ in some indentifier strings replaced with safer characters.
225
+
226
+ Take a name like 'stage1 (generic-cxx03, clang-22, clang++-22)'
227
+ and convert it to 'stage1_generic_cxx03__clang_22__clangxx_22'.
228
+ (Remove parentheses; replace commas, hyphens and spaces with
229
+ underscores; replace '+' with 'x'.)
230
+ """
231
+ # Names should have exactly one set of parentheses, so break on that. If
232
+ # they don't have any parentheses, then don't update them at all.
233
+ if old_name .find ("(" ) == - 1 :
234
+ return old_name
235
+ stage , remainder = old_name .split ("(" )
236
+ stage = stage .strip ()
237
+ if remainder [- 1 ] == ")" :
238
+ remainder = remainder [:- 1 ]
239
+ remainder = remainder .replace ("-" , "_" )
240
+ remainder = remainder .replace ("," , "_" )
241
+ remainder = remainder .replace (" " , "_" )
242
+ remainder = remainder .replace ("+" , "x" )
243
+ new_name = stage + "_" + remainder
244
+ return new_name
245
+
84
246
def github_get_metrics (
85
247
github_repo : github .Repository , last_workflows_seen_as_completed : set [int ]
86
248
) -> tuple [list [JobMetrics ], int ]:
@@ -146,6 +308,10 @@ def github_get_metrics(
146
308
if task .name not in GITHUB_WORKFLOW_TO_TRACK :
147
309
continue
148
310
311
+ libcxx_testing = False
312
+ if task .name == "Build and Test libc++" :
313
+ libcxx_testing = True
314
+
149
315
if task .status == "completed" :
150
316
workflow_seen_as_completed .add (task .id )
151
317
@@ -155,11 +321,19 @@ def github_get_metrics(
155
321
156
322
name_prefix = GITHUB_WORKFLOW_TO_TRACK [task .name ]
157
323
for job in task .jobs ():
324
+ if libcxx_testing :
325
+ # We're not running macos or windows libc++ tests on our
326
+ # infrastructure.
327
+ if job .name .find ("macos" ) != - 1 or job .name .find ("windows" ) != - 1 :
328
+ continue
158
329
# This job is not interesting to us.
159
- if job .name not in GITHUB_JOB_TO_TRACK [name_prefix ]:
330
+ elif job .name not in GITHUB_JOB_TO_TRACK [name_prefix ]:
160
331
continue
161
332
162
- name_suffix = GITHUB_JOB_TO_TRACK [name_prefix ][job .name ]
333
+ if libcxx_testing :
334
+ name_suffix = clean_up_libcxx_job_name (job .name )
335
+ else :
336
+ name_suffix = GITHUB_JOB_TO_TRACK [name_prefix ][job .name ]
163
337
metric_name = name_prefix + "_" + name_suffix
164
338
165
339
if task .status != "completed" :
@@ -208,21 +382,32 @@ def github_get_metrics(
208
382
continue
209
383
210
384
logging .info (f"Adding a job metric for job { job .id } in workflow { task .id } " )
211
- # The timestamp associated with the event is expected by Grafana to be
212
- # in nanoseconds.
385
+ # The completed_at_ns timestamp associated with the event is
386
+ # expected by Grafana to be in nanoseconds. Because we do math using
387
+ # all three times (when creating libc++ aggregates), we need them
388
+ # all to be in nanoseconds, even though created_at and started_at
389
+ # are not returned to Grafana.
390
+ created_at_ns = int (created_at .timestamp ()) * 10 ** 9
391
+ started_at_ns = int (started_at .timestamp ()) * 10 ** 9
213
392
completed_at_ns = int (completed_at .timestamp ()) * 10 ** 9
214
393
workflow_metrics .append (
215
394
JobMetrics (
216
395
metric_name ,
217
396
queue_time .seconds ,
218
397
run_time .seconds ,
219
398
job_result ,
399
+ created_at_ns ,
400
+ started_at_ns ,
220
401
completed_at_ns ,
221
402
task .id ,
222
403
task .name ,
223
404
)
224
405
)
225
406
407
+ # Finished collecting the JobMetrics for all jobs; now create the
408
+ # aggregates for any libc++ jobs.
409
+ create_and_append_libcxx_aggregates (workflow_metrics )
410
+
226
411
for name , value in queued_count .items ():
227
412
workflow_metrics .append (
228
413
GaugeMetric (f"workflow_queue_size_{ name } " , value , time .time_ns ())
@@ -278,6 +463,11 @@ def upload_metrics(workflow_metrics, metrics_userid, api_key):
278
463
metrics_batch .append (
279
464
f"{ name } queue_time={ workflow_metric .queue_time } ,run_time={ workflow_metric .run_time } ,status={ workflow_metric .status } { workflow_metric .completed_at_ns } "
280
465
)
466
+ elif isinstance (workflow_metric , AggregateMetric ):
467
+ name = workflow_metric .aggregate_name .lower ().replace (" " , "_" )
468
+ metrics_batch .append (
469
+ f"{ name } queue_time={ workflow_metric .aggregate_queue_time } ,run_time={ workflow_metric .aggregate_run_time } ,status={ workflow_metric .aggregate_status } { workflow_metric .completed_at_ns } "
470
+ )
281
471
else :
282
472
raise ValueError (
283
473
f"Unsupported object type { type (workflow_metric )} : { str (workflow_metric )} "
0 commit comments