Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3a2ebee

Browse files
feat(notifications): group workspace build failure report
Instead of sending X many reports to a single template admin, we instead send only 1.
1 parent 3f3e201 commit 3a2ebee

File tree

7 files changed

+473
-234
lines changed

7 files changed

+473
-234
lines changed

coderd/database/migrations/000316_group_build_failure_notifications.down.sql

Whitespace-only changes.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
UPDATE notification_templates
2+
SET
3+
name = 'Report: Workspace Builds Failed',
4+
title_template = 'Failed workspace builds report',
5+
body_template =
6+
E'The following templates have had build failures over the last {{.Data.report_frequency}}:
7+
{{range $template := .Data.templates}}
8+
- **{{$template.display_name}}** failed to build {{$template.failed_builds}}/{{$template.total_builds}} times
9+
{{end}}
10+
11+
**Report:**
12+
{{range $template := .Data.templates}}
13+
{{range $version := $template.versions}}
14+
**{{$template.display_name}}**@**{{$version.template_version_name}}** failed {{$version.failed_count}} time{{if gt $version.failed_count 1.0}}s{{end}}:
15+
{{range $build := $version.failed_builds}}
16+
* [{{$build.workspace_owner_username}} / {{$build.workspace_name}} / #{{$build.build_number}}]({{base_url}}/@{{$build.workspace_owner_username}}/{{$build.workspace_name}}/builds/{{$build.build_number}})
17+
{{end}}
18+
{{end}}
19+
{{end}}
20+
21+
We recommend reviewing these issues to ensure future builds are successful.'
22+
WHERE id = '34a20db2-e9cc-4a93-b0e4-8569699d7a00';

coderd/notifications/notifications_test.go

Lines changed: 75 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -978,45 +978,93 @@ func TestNotificationTemplates_Golden(t *testing.T) {
978978
UserName: "Bobby",
979979
UserEmail: "[email protected]",
980980
UserUsername: "bobby",
981-
Labels: map[string]string{
982-
"template_name": "bobby-first-template",
983-
"template_display_name": "Bobby First Template",
984-
},
981+
Labels: map[string]string{},
985982
// We need to use floats as `json.Unmarshal` unmarshal numbers in `map[string]any` to floats.
986983
Data: map[string]any{
987-
"failed_builds": 4.0,
988-
"total_builds": 55.0,
989984
"report_frequency": "week",
990-
"template_versions": []map[string]any{
985+
"templates": []map[string]any{
991986
{
992-
"template_version_name": "bobby-template-version-1",
993-
"failed_count": 3.0,
994-
"failed_builds": []map[string]any{
987+
"name": "bobby-first-template",
988+
"display_name": "Bobby First Template",
989+
"failed_builds": 4.0,
990+
"total_builds": 55.0,
991+
"versions": []map[string]any{
995992
{
996-
"workspace_owner_username": "mtojek",
997-
"workspace_name": "workspace-1",
998-
"build_number": 1234.0,
993+
"template_version_name": "bobby-template-version-1",
994+
"failed_count": 3.0,
995+
"failed_builds": []map[string]any{
996+
{
997+
"workspace_owner_username": "mtojek",
998+
"workspace_name": "workspace-1",
999+
"build_number": 1234.0,
1000+
},
1001+
{
1002+
"workspace_owner_username": "johndoe",
1003+
"workspace_name": "my-workspace-3",
1004+
"build_number": 5678.0,
1005+
},
1006+
{
1007+
"workspace_owner_username": "jack",
1008+
"workspace_name": "workwork",
1009+
"build_number": 774.0,
1010+
},
1011+
},
9991012
},
10001013
{
1001-
"workspace_owner_username": "johndoe",
1002-
"workspace_name": "my-workspace-3",
1003-
"build_number": 5678.0,
1004-
},
1005-
{
1006-
"workspace_owner_username": "jack",
1007-
"workspace_name": "workwork",
1008-
"build_number": 774.0,
1014+
"template_version_count": "bobby-template-version-2",
1015+
"failed_count": 1.0,
1016+
"failed_builds": []map[string]any{
1017+
{
1018+
"workspace_owner_username": "ben",
1019+
"workspace_name": "cool-workspace",
1020+
"build_number": 8888.0,
1021+
},
1022+
},
10091023
},
10101024
},
10111025
},
10121026
{
1013-
"template_version_name": "bobby-template-version-2",
1014-
"failed_count": 1.0,
1015-
"failed_builds": []map[string]any{
1027+
"name": "bobby-second-template",
1028+
"display_name": "Bobby Second Template",
1029+
"failed_builds": 5.0,
1030+
"total_builds": 50.0,
1031+
"versions": []map[string]any{
1032+
{
1033+
"template_version_name": "bobby-template-version-1",
1034+
"failed_count": 3.0,
1035+
"failed_builds": []map[string]any{
1036+
{
1037+
"workspace_owner_username": "daniellemaywood",
1038+
"workspace_name": "workspace-9",
1039+
"build_number": 9234.0,
1040+
},
1041+
{
1042+
"workspace_owner_username": "johndoe",
1043+
"workspace_name": "my-workspace-7",
1044+
"build_number": 8678.0,
1045+
},
1046+
{
1047+
"workspace_owner_username": "jack",
1048+
"workspace_name": "workworkwork",
1049+
"build_number": 374.0,
1050+
},
1051+
},
1052+
},
10161053
{
1017-
"workspace_owner_username": "ben",
1018-
"workspace_name": "cool-workspace",
1019-
"build_number": 8888.0,
1054+
"template_version_count": "bobby-template-version-2",
1055+
"failed_count": 2.0,
1056+
"failed_builds": []map[string]any{
1057+
{
1058+
"workspace_owner_username": "ben",
1059+
"workspace_name": "more-cool-workspace",
1060+
"build_number": 8878.0,
1061+
},
1062+
{
1063+
"workspace_owner_username": "ben",
1064+
"workspace_name": "less-cool-workspace",
1065+
"build_number": 8848.0,
1066+
},
1067+
},
10201068
},
10211069
},
10221070
},

coderd/notifications/reports/generator.go

Lines changed: 94 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/coder/coder/v2/coderd/database/dbauthz"
1919
"github.com/coder/coder/v2/coderd/database/dbtime"
2020
"github.com/coder/coder/v2/coderd/notifications"
21+
"github.com/coder/coder/v2/coderd/util/slice"
2122
"github.com/coder/coder/v2/codersdk"
2223
)
2324

@@ -102,6 +103,11 @@ const (
102103
failedWorkspaceBuildsReportFrequencyLabel = "week"
103104
)
104105

106+
type adminReport struct {
107+
stats database.GetWorkspaceBuildStatsByTemplatesRow
108+
failedBuilds []database.GetFailedWorkspaceBuildsByTemplateIDRow
109+
}
110+
105111
func reportFailedWorkspaceBuilds(ctx context.Context, logger slog.Logger, db database.Store, enqueuer notifications.Enqueuer, clk quartz.Clock) error {
106112
now := clk.Now()
107113
since := now.Add(-failedWorkspaceBuildsReportFrequency)
@@ -136,6 +142,8 @@ func reportFailedWorkspaceBuilds(ctx context.Context, logger slog.Logger, db dat
136142
return xerrors.Errorf("unable to fetch failed workspace builds: %w", err)
137143
}
138144

145+
reports := make(map[uuid.UUID][]adminReport)
146+
139147
for _, stats := range templateStatsRows {
140148
select {
141149
case <-ctx.Done():
@@ -165,33 +173,40 @@ func reportFailedWorkspaceBuilds(ctx context.Context, logger slog.Logger, db dat
165173
logger.Error(ctx, "unable to fetch failed workspace builds", slog.F("template_id", stats.TemplateID), slog.Error(err))
166174
continue
167175
}
168-
reportData := buildDataForReportFailedWorkspaceBuilds(stats, failedBuilds)
169176

170-
// Send reports to template admins
171-
templateDisplayName := stats.TemplateDisplayName
172-
if templateDisplayName == "" {
173-
templateDisplayName = stats.TemplateName
177+
for _, templateAdmin := range templateAdmins {
178+
adminReports := reports[templateAdmin.ID]
179+
adminReports = append(adminReports, adminReport{
180+
failedBuilds: failedBuilds,
181+
stats: stats,
182+
})
183+
184+
reports[templateAdmin.ID] = adminReports
174185
}
186+
}
175187

176-
for _, templateAdmin := range templateAdmins {
177-
select {
178-
case <-ctx.Done():
179-
logger.Debug(ctx, "context is canceled, quitting", slog.Error(ctx.Err()))
180-
break
181-
default:
182-
}
188+
for templateAdmin, reports := range reports {
189+
select {
190+
case <-ctx.Done():
191+
logger.Debug(ctx, "context is canceled, quitting", slog.Error(ctx.Err()))
192+
break
193+
default:
194+
}
183195

184-
if _, err := enqueuer.EnqueueWithData(ctx, templateAdmin.ID, notifications.TemplateWorkspaceBuildsFailedReport,
185-
map[string]string{
186-
"template_name": stats.TemplateName,
187-
"template_display_name": templateDisplayName,
188-
},
189-
reportData,
190-
"report_generator",
191-
stats.TemplateID, stats.TemplateOrganizationID,
192-
); err != nil {
193-
logger.Warn(ctx, "failed to send a report with failed workspace builds", slog.Error(err))
194-
}
196+
reportData := buildDataForReportFailedWorkspaceBuilds(reports)
197+
198+
targets := []uuid.UUID{}
199+
for _, report := range reports {
200+
targets = append(targets, report.stats.TemplateID, report.stats.TemplateOrganizationID)
201+
}
202+
203+
if _, err := enqueuer.EnqueueWithData(ctx, templateAdmin, notifications.TemplateWorkspaceBuildsFailedReport,
204+
map[string]string{},
205+
reportData,
206+
"report_generator",
207+
slice.Unique(targets)...,
208+
); err != nil {
209+
logger.Warn(ctx, "failed to send a report with failed workspace builds", slog.Error(err))
195210
}
196211
}
197212

@@ -213,54 +228,69 @@ func reportFailedWorkspaceBuilds(ctx context.Context, logger slog.Logger, db dat
213228

214229
const workspaceBuildsLimitPerTemplateVersion = 10
215230

216-
func buildDataForReportFailedWorkspaceBuilds(stats database.GetWorkspaceBuildStatsByTemplatesRow, failedBuilds []database.GetFailedWorkspaceBuildsByTemplateIDRow) map[string]any {
217-
// Build notification model for template versions and failed workspace builds.
218-
//
219-
// Failed builds are sorted by template version ascending, workspace build number descending.
220-
// Review builds, group them by template versions, and assign to builds to template versions.
221-
// The map requires `[]map[string]any{}` to be compatible with data passed to `NotificationEnqueuer`.
222-
templateVersions := []map[string]any{}
223-
for _, failedBuild := range failedBuilds {
224-
c := len(templateVersions)
225-
226-
if c == 0 || templateVersions[c-1]["template_version_name"] != failedBuild.TemplateVersionName {
227-
templateVersions = append(templateVersions, map[string]any{
228-
"template_version_name": failedBuild.TemplateVersionName,
229-
"failed_count": 1,
230-
"failed_builds": []map[string]any{
231-
{
232-
"workspace_owner_username": failedBuild.WorkspaceOwnerUsername,
233-
"workspace_name": failedBuild.WorkspaceName,
234-
"build_number": failedBuild.WorkspaceBuildNumber,
231+
func buildDataForReportFailedWorkspaceBuilds(reports []adminReport) map[string]any {
232+
templates := []map[string]any{}
233+
234+
for _, report := range reports {
235+
// Build notification model for template versions and failed workspace builds.
236+
//
237+
// Failed builds are sorted by template version ascending, workspace build number descending.
238+
// Review builds, group them by template versions, and assign to builds to template versions.
239+
// The map requires `[]map[string]any{}` to be compatible with data passed to `NotificationEnqueuer`.
240+
templateVersions := []map[string]any{}
241+
for _, failedBuild := range report.failedBuilds {
242+
c := len(templateVersions)
243+
244+
if c == 0 || templateVersions[c-1]["template_version_name"] != failedBuild.TemplateVersionName {
245+
templateVersions = append(templateVersions, map[string]any{
246+
"template_version_name": failedBuild.TemplateVersionName,
247+
"failed_count": 1,
248+
"failed_builds": []map[string]any{
249+
{
250+
"workspace_owner_username": failedBuild.WorkspaceOwnerUsername,
251+
"workspace_name": failedBuild.WorkspaceName,
252+
"build_number": failedBuild.WorkspaceBuildNumber,
253+
},
235254
},
236-
},
237-
})
238-
continue
255+
})
256+
continue
257+
}
258+
259+
tv := templateVersions[c-1]
260+
//nolint:errorlint,forcetypeassert // only this function prepares the notification model
261+
tv["failed_count"] = tv["failed_count"].(int) + 1
262+
263+
//nolint:errorlint,forcetypeassert // only this function prepares the notification model
264+
builds := tv["failed_builds"].([]map[string]any)
265+
if len(builds) < workspaceBuildsLimitPerTemplateVersion {
266+
// return N last builds to prevent long email reports
267+
builds = append(builds, map[string]any{
268+
"workspace_owner_username": failedBuild.WorkspaceOwnerUsername,
269+
"workspace_name": failedBuild.WorkspaceName,
270+
"build_number": failedBuild.WorkspaceBuildNumber,
271+
})
272+
tv["failed_builds"] = builds
273+
}
274+
templateVersions[c-1] = tv
239275
}
240276

241-
tv := templateVersions[c-1]
242-
//nolint:errorlint,forcetypeassert // only this function prepares the notification model
243-
tv["failed_count"] = tv["failed_count"].(int) + 1
244-
245-
//nolint:errorlint,forcetypeassert // only this function prepares the notification model
246-
builds := tv["failed_builds"].([]map[string]any)
247-
if len(builds) < workspaceBuildsLimitPerTemplateVersion {
248-
// return N last builds to prevent long email reports
249-
builds = append(builds, map[string]any{
250-
"workspace_owner_username": failedBuild.WorkspaceOwnerUsername,
251-
"workspace_name": failedBuild.WorkspaceName,
252-
"build_number": failedBuild.WorkspaceBuildNumber,
253-
})
254-
tv["failed_builds"] = builds
277+
templateDisplayName := report.stats.TemplateDisplayName
278+
if templateDisplayName == "" {
279+
templateDisplayName = report.stats.TemplateName
255280
}
256-
templateVersions[c-1] = tv
281+
282+
templates = append(templates, map[string]any{
283+
"failed_builds": report.stats.FailedBuilds,
284+
"total_builds": report.stats.TotalBuilds,
285+
"versions": templateVersions,
286+
"name": report.stats.TemplateName,
287+
"display_name": templateDisplayName,
288+
})
257289
}
258290

259291
return map[string]any{
260-
"failed_builds": stats.FailedBuilds,
261-
"total_builds": stats.TotalBuilds,
262-
"report_frequency": failedWorkspaceBuildsReportFrequencyLabel,
263-
"template_versions": templateVersions,
292+
"report_frequency": failedWorkspaceBuildsReportFrequencyLabel,
293+
"templates": templates,
264294
}
265295
}
266296

0 commit comments

Comments
 (0)