Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ee08e26

Browse files
authored
fix(api): append log when retry job (#5294)
1 parent 5a9f2dd commit ee08e26

File tree

10 files changed

+53
-78
lines changed

10 files changed

+53
-78
lines changed

engine/api/api.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@ func (a *API) Serve(ctx context.Context) error {
752752
}, a.PanicDump())
753753
sdk.GoRoutine(ctx, "workflow.Initialize",
754754
func(ctx context.Context) {
755-
workflow.Initialize(ctx, a.DBConnectionFactory.GetDBMap, a.Cache, a.Config.URL.UI, a.Config.DefaultOS, a.Config.DefaultArch)
755+
workflow.Initialize(ctx, a.DBConnectionFactory.GetDBMap, a.Cache, a.Config.URL.UI, a.Config.DefaultOS, a.Config.DefaultArch, a.Config.Log.StepMaxSize)
756756
}, a.PanicDump())
757757
sdk.GoRoutine(ctx, "PushInElasticSearch",
758758
func(ctx context.Context) {

engine/api/worker.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ func (api *API) disableWorkerHandler() service.Handler {
158158
}
159159
}
160160

161-
if err := DisableWorker(ctx, api.mustDB(), id); err != nil {
161+
if err := DisableWorker(ctx, api.mustDB(), id, api.Config.Log.StepMaxSize); err != nil {
162162
cause := sdk.Cause(err)
163163
if cause == worker.ErrNoWorker || cause == sql.ErrNoRows {
164164
return sdk.WrapError(sdk.ErrWrongRequest, "disableWorkerHandler> worker %s does not exists", id)
@@ -190,7 +190,7 @@ func (api *API) postUnregisterWorkerHandler() service.Handler {
190190
if err != nil {
191191
return err
192192
}
193-
if err := DisableWorker(ctx, api.mustDB(), wk.ID); err != nil {
193+
if err := DisableWorker(ctx, api.mustDB(), wk.ID, api.Config.Log.StepMaxSize); err != nil {
194194
return sdk.WrapError(err, "cannot delete worker %s", wk.Name)
195195
}
196196
return nil
@@ -224,7 +224,7 @@ func (api *API) workerWaitingHandler() service.Handler {
224224
// the package workflow
225225

226226
// DisableWorker disable a worker
227-
func DisableWorker(ctx context.Context, db *gorp.DbMap, id string) error {
227+
func DisableWorker(ctx context.Context, db *gorp.DbMap, id string, maxLogSize int64) error {
228228
tx, errb := db.Begin()
229229
if errb != nil {
230230
return fmt.Errorf("DisableWorker> Cannot start tx: %v", errb)
@@ -244,7 +244,7 @@ func DisableWorker(ctx context.Context, db *gorp.DbMap, id string) error {
244244
// We need to restart this action
245245
wNodeJob, errL := workflow.LoadNodeJobRun(ctx, tx, nil, jobID.Int64)
246246
if errL == nil && wNodeJob.Retry < 3 {
247-
if err := workflow.RestartWorkflowNodeJob(context.TODO(), db, *wNodeJob); err != nil {
247+
if err := workflow.RestartWorkflowNodeJob(context.TODO(), db, *wNodeJob, maxLogSize); err != nil {
248248
log.Warning(ctx, "DisableWorker[%s]> Cannot restart workflow node run: %v", name, err)
249249
} else {
250250
log.Info(ctx, "DisableWorker[%s]> WorkflowNodeRun %d restarted after crash", name, jobID.Int64)

engine/api/workflow/execute_node_job_run.go

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -566,19 +566,21 @@ func FreeNodeJobRun(ctx context.Context, store cache.Store, id int64) error {
566566
return sdk.WrapError(sdk.ErrJobNotBooked, "BookNodeJobRun> job %d already released", id)
567567
}
568568

569-
//AddLog adds a build log
570-
func AddLog(db gorp.SqlExecutor, job *sdk.WorkflowNodeJobRun, logs *sdk.Log, maxLogSize int64) error {
571-
if job != nil {
572-
logs.JobID = job.ID
573-
logs.NodeRunID = job.WorkflowNodeRunID
574-
}
575-
569+
// AppendLog adds a build log.
570+
func AppendLog(db gorp.SqlExecutor, jobID, nodeRunID, stepOrder int64, val string, maxLogSize int64) error {
576571
// check if log exists without loading data but with log size
577-
exists, size, err := ExistsStepLog(db, logs.JobID, logs.StepOrder)
572+
exists, size, err := ExistsStepLog(db, jobID, stepOrder)
578573
if err != nil {
579574
return sdk.WrapError(err, "cannot check if log exists")
580575
}
581576

577+
logs := &sdk.Log{
578+
JobID: jobID,
579+
NodeRunID: nodeRunID,
580+
StepOrder: stepOrder,
581+
Val: val,
582+
}
583+
582584
// ignore the log if max size already reached
583585
if maxReached := truncateLogs(maxLogSize, size, logs); maxReached {
584586
log.Debug("truncated logs")
@@ -628,7 +630,7 @@ func AddServiceLog(db gorp.SqlExecutor, job *sdk.WorkflowNodeJobRun, logs *sdk.S
628630
}
629631

630632
// RestartWorkflowNodeJob restart all workflow node job and update logs to indicate restart
631-
func RestartWorkflowNodeJob(ctx context.Context, db gorp.SqlExecutor, wNodeJob sdk.WorkflowNodeJobRun) error {
633+
func RestartWorkflowNodeJob(ctx context.Context, db gorp.SqlExecutor, wNodeJob sdk.WorkflowNodeJobRun, maxLogSize int64) error {
632634
var end func()
633635
ctx, end = observability.Span(ctx, "workflow.RestartWorkflowNodeJob")
634636
defer end()
@@ -638,43 +640,38 @@ func RestartWorkflowNodeJob(ctx context.Context, db gorp.SqlExecutor, wNodeJob s
638640
if step.Status == sdk.StatusNeverBuilt || step.Status == sdk.StatusSkipped || step.Status == sdk.StatusDisabled {
639641
continue
640642
}
641-
l, errL := LoadStepLogs(db, wNodeJob.ID, int64(step.StepOrder))
642-
if errL != nil {
643-
return sdk.WrapError(errL, "RestartWorkflowNodeJob> error while load step logs")
644-
}
645643
wNodeJob.Job.Reason = "Killed (Reason: Timeout)\n"
646644
step.Status = sdk.StatusWaiting
647645
step.Done = time.Time{}
648-
if l != nil { // log could be nil here
649-
l.Done = nil
650-
logbuf := bytes.NewBufferString(l.Val)
651-
logbuf.WriteString("\n\n\n-=-=-=-=-=- Worker timeout: job replaced in queue -=-=-=-=-=-\n\n\n")
652-
l.Val = logbuf.String()
653-
if err := updateLog(db, l); err != nil {
654-
return sdk.WrapError(errL, "RestartWorkflowNodeJob> error while update step log")
655-
}
646+
if err := AppendLog(
647+
db, wNodeJob.ID, wNodeJob.WorkflowNodeRunID, int64(step.StepOrder),
648+
"\n\n\n-=-=-=-=-=- Worker timeout: job replaced in queue -=-=-=-=-=-\n\n\n",
649+
maxLogSize,
650+
); err != nil {
651+
return err
656652
}
657653
}
658-
nodeRun, errNR := LoadAndLockNodeRunByID(ctx, db, wNodeJob.WorkflowNodeRunID)
659-
if errNR != nil {
660-
return errNR
654+
655+
nodeRun, err := LoadAndLockNodeRunByID(ctx, db, wNodeJob.WorkflowNodeRunID)
656+
if err != nil {
657+
return err
661658
}
662659

663660
//Synchronize struct but not in db
664-
sync, errS := SyncNodeRunRunJob(ctx, db, nodeRun, wNodeJob)
665-
if errS != nil {
666-
return sdk.WrapError(errS, "RestartWorkflowNodeJob> error on sync nodeJobRun")
661+
sync, err := SyncNodeRunRunJob(ctx, db, nodeRun, wNodeJob)
662+
if err != nil {
663+
return sdk.WrapError(err, "error on sync nodeJobRun")
667664
}
668665
if !sync {
669-
log.Warning(ctx, "RestartWorkflowNodeJob> sync doesn't find a nodeJobRun")
666+
log.Warning(ctx, "sync doesn't find a nodeJobRun")
670667
}
671668

672-
if errU := UpdateNodeRun(db, nodeRun); errU != nil {
673-
return sdk.WrapError(errU, "RestartWorkflowNodeJob> Cannot update node run")
669+
if err := UpdateNodeRun(db, nodeRun); err != nil {
670+
return sdk.WrapError(err, "cannot update node run")
674671
}
675672

676673
if err := replaceWorkflowJobRunInQueue(db, wNodeJob); err != nil {
677-
return sdk.WrapError(err, "Cannot replace workflow job in queue")
674+
return sdk.WrapError(err, "cannot replace workflow job in queue")
678675
}
679676

680677
return nil

engine/api/workflow/execute_node_job_run_log.go

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ func LoadLogs(db gorp.SqlExecutor, id int64) ([]sdk.Log, error) {
9696
}
9797

9898
func insertLog(db gorp.SqlExecutor, logs *sdk.Log) error {
99+
now := time.Now()
100+
logs.Start = &now
101+
logs.LastModified = &now
102+
99103
query := `
100104
INSERT INTO workflow_node_run_job_logs (workflow_node_run_job_id, workflow_node_run_id, start, last_modified, done, step_order, value)
101105
VALUES ($1, $2, $3, $4, $5, $6, $7)
@@ -105,26 +109,18 @@ func insertLog(db gorp.SqlExecutor, logs *sdk.Log) error {
105109

106110
func updateLog(db gorp.SqlExecutor, logs *sdk.Log) error {
107111
now := time.Now()
108-
if logs.Start == nil {
109-
logs.Start = &now
110-
}
111-
if logs.LastModified == nil {
112-
logs.LastModified = &now
113-
}
114-
if logs.Done == nil {
115-
logs.Done = &now
116-
}
112+
logs.LastModified = &now
113+
logs.Done = &now
117114

118115
query := `
119116
UPDATE workflow_node_run_job_logs set
120117
workflow_node_run_id = $3,
121-
start = $4,
122-
last_modified = $5,
123-
done = $6,
124-
value = value || $7
118+
last_modified = $4,
119+
done = $5,
120+
value = value || $6
125121
WHERE workflow_node_run_job_id = $1 AND step_order = $2`
126122

127-
if _, err := db.Exec(query, logs.JobID, logs.StepOrder, logs.NodeRunID, logs.Start, logs.LastModified, logs.Done, logs.Val); err != nil {
123+
if _, err := db.Exec(query, logs.JobID, logs.StepOrder, logs.NodeRunID, logs.LastModified, logs.Done, logs.Val); err != nil {
128124
return sdk.WithStack(err)
129125
}
130126
return nil

engine/api/workflow/heartbeat.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313
const maxRetry = 3
1414

1515
// manageDeadJob restart all jobs which are building but without worker
16-
func manageDeadJob(ctx context.Context, DBFunc func() *gorp.DbMap, store cache.Store) error {
16+
func manageDeadJob(ctx context.Context, DBFunc func() *gorp.DbMap, store cache.Store, maxLogSize int64) error {
1717
db := DBFunc()
1818
deadJobs, err := LoadDeadNodeJobRun(ctx, db, store)
1919
if err != nil {
@@ -41,7 +41,7 @@ func manageDeadJob(ctx context.Context, DBFunc func() *gorp.DbMap, store cache.S
4141
continue
4242
}
4343
} else {
44-
if err := RestartWorkflowNodeJob(ctx, tx, deadJob); err != nil {
44+
if err := RestartWorkflowNodeJob(ctx, tx, deadJob, maxLogSize); err != nil {
4545
log.Warning(ctx, "manageDeadJob> Cannot restart node job run %d: %v", deadJob.ID, err)
4646
_ = tx.Rollback()
4747
continue

engine/api/workflow/init.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313
var baseUIURL, defaultOS, defaultArch string
1414

1515
//Initialize starts goroutines for workflows
16-
func Initialize(ctx context.Context, DBFunc func() *gorp.DbMap, store cache.Store, uiURL, confDefaultOS, confDefaultArch string) {
16+
func Initialize(ctx context.Context, DBFunc func() *gorp.DbMap, store cache.Store, uiURL, confDefaultOS, confDefaultArch string, maxLogSize int64) {
1717
baseUIURL = uiURL
1818
defaultOS = confDefaultOS
1919
defaultArch = confDefaultArch
@@ -31,7 +31,7 @@ func Initialize(ctx context.Context, DBFunc func() *gorp.DbMap, store cache.Stor
3131
return
3232
}
3333
case <-tickHeart.C:
34-
if err := manageDeadJob(ctx, DBFunc, store); err != nil {
34+
if err := manageDeadJob(ctx, DBFunc, store, maxLogSize); err != nil {
3535
log.Warning(ctx, "workflow.manageDeadJob> Error on restartDeadJob : %v", err)
3636
}
3737
case <-tickStop.C:

engine/api/workflow/run_workflow_test.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -634,16 +634,12 @@ queueRun:
634634
assert.Len(t, secrets, 1)
635635

636636
//TestAddLog
637-
assert.NoError(t, workflow.AddLog(db, j, &sdk.Log{
638-
Val: "This is a log",
639-
}, workflow.DefaultMaxLogSize))
637+
assert.NoError(t, workflow.AppendLog(db, j.ID, j.WorkflowNodeRunID, 1, "This is a log", workflow.DefaultMaxLogSize))
640638
if t.Failed() {
641639
tx.Rollback()
642640
t.FailNow()
643641
}
644-
assert.NoError(t, workflow.AddLog(db, j, &sdk.Log{
645-
Val: "This is another log",
646-
}, workflow.DefaultMaxLogSize))
642+
assert.NoError(t, workflow.AppendLog(db, j.ID, j.WorkflowNodeRunID, 1, "This is another log", workflow.DefaultMaxLogSize))
647643
if t.Failed() {
648644
tx.Rollback()
649645
t.FailNow()

engine/api/workflow_queue.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ func (api *API) postWorkflowJobLogsHandler() service.Handler {
567567
return err
568568
}
569569

570-
if err := workflow.AddLog(api.mustDB(), pbJob, &logs, api.Config.Log.StepMaxSize); err != nil {
570+
if err := workflow.AppendLog(api.mustDB(), pbJob.ID, pbJob.WorkflowNodeRunID, logs.StepOrder, logs.Val, api.Config.Log.StepMaxSize); err != nil {
571571
return err
572572
}
573573

engine/api/workflow_run_test.go

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2451,16 +2451,10 @@ func initGetWorkflowNodeRunJobTest(t *testing.T, api *API, db *gorp.DbMap) (*sdk
24512451
require.NoError(t, errUJ)
24522452

24532453
// Add log
2454-
require.NoError(t, workflow.AddLog(api.mustDB(), jobRun, &sdk.Log{
2455-
StepOrder: 1,
2456-
Val: "1234567890",
2457-
}, 15))
2454+
require.NoError(t, workflow.AppendLog(api.mustDB(), jobRun.ID, jobRun.WorkflowNodeRunID, 1, "1234567890", 15))
24582455

24592456
// Add truncated log
2460-
require.NoError(t, workflow.AddLog(api.mustDB(), jobRun, &sdk.Log{
2461-
StepOrder: 1,
2462-
Val: "1234567890",
2463-
}, 15))
2457+
require.NoError(t, workflow.AppendLog(api.mustDB(), jobRun.ID, jobRun.WorkflowNodeRunID, 1, "1234567890", 15))
24642458

24652459
// Add service log
24662460
require.NoError(t, workflow.AddServiceLog(api.mustDB(), jobRun, &sdk.ServiceLog{

engine/cdn/cdn_log.go

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -238,15 +238,7 @@ func buildMessage(signature log.Signature, m hook.Message) string {
238238
}
239239

240240
func (s *Service) processLog(ctx context.Context, db gorp.SqlExecutor, signature log.Signature, message string) error {
241-
now := time.Now()
242-
l := sdk.Log{
243-
JobID: signature.JobID,
244-
NodeRunID: signature.NodeRunID,
245-
LastModified: &now,
246-
StepOrder: signature.Worker.StepOrder,
247-
Val: message,
248-
}
249-
return workflow.AddLog(db, nil, &l, s.Cfg.Log.StepMaxSize)
241+
return workflow.AppendLog(db, signature.JobID, signature.NodeRunID, signature.Worker.StepOrder, message, s.Cfg.Log.StepMaxSize)
250242
}
251243

252244
func (s *Service) handleServiceLog(ctx context.Context, hatcheryID int64, hatcheryName string, workerName string, sig interface{}, m hook.Message) error {

0 commit comments

Comments
 (0)