nektos · avoidik · Jan 14, 2026 · Jan 19, 2026
diff --git a/pkg/common/executor.go b/pkg/common/executor.go
@@ -26,6 +26,21 @@ func Warningf(format string, args ...interface{}) Warning {
 	return w
 }
 
+// FailFastError wraps a context cancellation error with a more informative message
+type FailFastError struct {
+	Err error
+}
+
+// Error returns the error message
+func (e FailFastError) Error() string {
+	return "Job cancelled (fail-fast)"
+}
+
+// Unwrap allows errors.Is and errors.As to work
+func (e FailFastError) Unwrap() error {
+	return e.Err
+}
+
 // Executor define contract for the steps of a workflow
 type Executor func(ctx context.Context) error
 
@@ -131,6 +146,101 @@ func NewParallelExecutor(parallel int, executors ...Executor) Executor {
 	}
 }
 
+// NewFailFastParallelExecutor creates a parallel executor that respects fail-fast semantics
+// When fail-fast is enabled via context, it will cancel remaining work on first error
+func NewFailFastParallelExecutor(parallel int, executors ...Executor) Executor {
+	return func(ctx context.Context) error {
+		failFast := IsFailFast(ctx)
+
+		// If fail-fast is disabled, use the standard parallel executor
+		if !failFast {
+			return NewParallelExecutor(parallel, executors...)(ctx)
+		}
+
+		// Fail-fast mode: create a cancellable context for workers
+		workCtx, cancelWork := context.WithCancel(ctx)
+		defer cancelWork()
+
+		work := make(chan Executor, len(executors))
+		errs := make(chan error, len(executors))
+
+		if 1 > parallel {
+			log.Debugf("Parallel tasks (%d) below minimum, setting to 1", parallel)
+			parallel = 1
+		}
+
+		// Start worker goroutines
+		for i := 0; i < parallel; i++ {
+			go func(work <-chan Executor, errs chan<- error) {
+				for executor := range work {
+					// Check if work context was cancelled (fail-fast triggered)
+					if workCtx.Err() != nil {
+						errs <- FailFastError{Err: workCtx.Err()}
+						continue
+					}
+					errs <- executor(workCtx)
+				}
+			}(work, errs)
+		}
+
+		// Queue work and monitor for failures
+		go func() {
+			defer close(work)
+			for i := 0; i < len(executors); i++ {
+				// Check if we should stop queuing due to failure
+				if workCtx.Err() != nil {
+					// Don't queue remaining work, but send cancelled errors for remaining executors
+					for j := i; j < len(executors); j++ {
+						errs <- FailFastError{Err: workCtx.Err()}
+					}
+					return
+				}
+				work <- executors[i]
+			}
+		}()
+
+		// Collect results and trigger fail-fast on first error
+		var firstErr error
+		var firstFailFastErr error
+		for i := 0; i < len(executors); i++ {
+			err := <-errs
+
+			if err != nil {
+				switch err.(type) {
+				case Warning:
+					// Warnings don't trigger fail-fast
+					log.Warning(err.Error())
+				case FailFastError:
+					// FailFastErrors are just cancellation notifications, not the root cause
+					// Keep the first one for returning if no real error is found
+					if firstFailFastErr == nil {
+						firstFailFastErr = err
+					}
+				default:
+					// First real error triggers fail-fast
+					if firstErr == nil {
+						firstErr = err
+						// Cancel remaining work on first real error
+						cancelWork()
+					}
+				}
+			}
+		}
+
+		// If we only have FailFastErrors (all jobs were cancelled), return that
+		if firstErr == nil && firstFailFastErr != nil {
+			firstErr = firstFailFastErr
+		}
+
+		// Check if parent context was cancelled
+		if err := ctx.Err(); err != nil {
+			return err
+		}
+
+		return firstErr
+	}
+}
+
 func NewFieldExecutor(name string, value interface{}, exec Executor) Executor {
 	return func(ctx context.Context) error {
 		return exec(WithLogger(ctx, Logger(ctx).WithField(name, value)))

diff --git a/pkg/common/executor_test.go b/pkg/common/executor_test.go
@@ -3,6 +3,7 @@ package common
 import (
 	"context"
 	"fmt"
+	"sync"
 	"testing"
 	"time"
 
@@ -150,3 +151,240 @@ func TestNewParallelExecutorCanceled(t *testing.T) {
 	assert.Equal(3, count)
 	assert.Error(errExpected, err)
 }
+
+func TestNewFailFastParallelExecutorWithFailFastTrue(t *testing.T) {
+	assert := assert.New(t)
+
+	ctx := WithFailFast(context.Background(), true)
+
+	executedCount := 0
+	var mu sync.Mutex
+
+	// Create executors: some succeed, one fails, rest should be cancelled
+	executors := []Executor{
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			time.Sleep(100 * time.Millisecond)
+			return nil
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			time.Sleep(100 * time.Millisecond)
+			return fmt.Errorf("intentional failure")
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			time.Sleep(2 * time.Second) // Should be cancelled
+			return nil
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			time.Sleep(2 * time.Second) // Should be cancelled
+			return nil
+		},
+	}
+
+	err := NewFailFastParallelExecutor(2, executors...)(ctx)
+
+	assert.Error(err)
+	assert.Contains(err.Error(), "intentional failure")
+}
+
+func TestNewFailFastParallelExecutorWithFailFastFalse(t *testing.T) {
+	assert := assert.New(t)
+
+	ctx := WithFailFast(context.Background(), false)
+
+	executedCount := 0
+	var mu sync.Mutex
+
+	executors := []Executor{
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return nil
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return fmt.Errorf("intentional failure")
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return nil
+		},
+	}
+
+	err := NewFailFastParallelExecutor(2, executors...)(ctx)
+
+	assert.Error(err)
+	mu.Lock()
+	assert.Equal(3, executedCount, "all executors should run when fail-fast is false")
+	mu.Unlock()
+}
+
+func TestNewFailFastParallelExecutorNoFailFastInContext(t *testing.T) {
+	assert := assert.New(t)
+
+	ctx := context.Background()
+
+	executedCount := 0
+	var mu sync.Mutex
+
+	executors := []Executor{
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return nil
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return fmt.Errorf("intentional failure")
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return nil
+		},
+	}
+
+	err := NewFailFastParallelExecutor(2, executors...)(ctx)
+
+	assert.Error(err)
+	mu.Lock()
+	assert.Equal(3, executedCount, "all executors should run when fail-fast not in context")
+	mu.Unlock()
+}
+
+func TestNewFailFastParallelExecutorWithWarnings(t *testing.T) {
+	assert := assert.New(t)
+
+	ctx := WithFailFast(context.Background(), true)
+
+	executedCount := 0
+	var mu sync.Mutex
+
+	// Warnings should not trigger fail-fast
+	executors := []Executor{
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return Warningf("this is a warning")
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return nil
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return nil
+		},
+	}
+
+	err := NewFailFastParallelExecutor(2, executors...)(ctx)
+
+	// Warnings don't cause executor to fail
+	assert.NoError(err)
+	mu.Lock()
+	assert.Equal(3, executedCount, "all executors should run when only warnings occur")
+	mu.Unlock()
+}
+
+func TestNewFailFastParallelExecutorParentContextCanceled(t *testing.T) {
+	assert := assert.New(t)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	ctx = WithFailFast(ctx, true)
+
+	executedCount := 0
+	var mu sync.Mutex
+
+	executors := []Executor{
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			time.Sleep(100 * time.Millisecond)
+			return nil
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			// Cancel parent context
+			cancel()
+			time.Sleep(100 * time.Millisecond)
+			return nil
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return nil
+		},
+	}
+
+	err := NewFailFastParallelExecutor(2, executors...)(ctx)
+
+	// Should return context.Canceled from parent
+	assert.ErrorIs(err, context.Canceled)
+}
+
+func TestNewFailFastParallelExecutorAllSuccess(t *testing.T) {
+	assert := assert.New(t)
+
+	ctx := WithFailFast(context.Background(), true)
+
+	executedCount := 0
+	var mu sync.Mutex
+
+	// All executors succeed - fail-fast shouldn't interfere
+	executors := []Executor{
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return nil
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return nil
+		},
+		func(ctx context.Context) error {
+			mu.Lock()
+			executedCount++
+			mu.Unlock()
+			return nil
+		},
+	}
+
+	err := NewFailFastParallelExecutor(2, executors...)(ctx)
+
+	assert.NoError(err)
+	mu.Lock()
+	assert.Equal(3, executedCount, "all executors should run when all succeed")
+	mu.Unlock()
+}
diff --git a/pkg/common/job_error.go b/pkg/common/job_error.go
@@ -12,6 +12,26 @@ type jobCancelCtx string
 
 const JobCancelCtxVal = jobCancelCtx("job.cancel")
 
+type failFastContextKey string
+
+const FailFastContextKeyVal = failFastContextKey("job.failfast")
+
+// WithFailFast adds fail-fast configuration to the context
+func WithFailFast(ctx context.Context, failFast bool) context.Context {
+	return context.WithValue(ctx, FailFastContextKeyVal, failFast)
+}
+
+// IsFailFast returns whether fail-fast is enabled for this context
+func IsFailFast(ctx context.Context) bool {
+	val := ctx.Value(FailFastContextKeyVal)
+	if val != nil {
+		if ff, ok := val.(bool); ok {
+			return ff
+		}
+	}
+	return false
+}
+
 // JobError returns the job error for current context if any
 func JobError(ctx context.Context) error {
 	val := ctx.Value(jobErrorContextKeyVal)