From 64c17a92fdd20d4905376266cff09b2540cf8df1 Mon Sep 17 00:00:00 2001 From: Paul Gaiduk Date: Wed, 8 Oct 2025 23:32:44 +0200 Subject: [PATCH] newlog: get rid of Fatal's in vector.go Instead of fataling out when we fail to create the socket listener, we retry forever with a backoff. This is important because if newlogd exits, the watchdog will reboot the whole system, which is not what we want for a transient failure like a missing directory. Signed-off-by: Paul Gaiduk --- pkg/newlog/cmd/vector.go | 53 ++++++++++++++++++-------- pkg/newlog/cmd/vector_test.go | 72 +++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 16 deletions(-) create mode 100644 pkg/newlog/cmd/vector_test.go diff --git a/pkg/newlog/cmd/vector.go b/pkg/newlog/cmd/vector.go index 50aa5be48c7..dfbc4d7b3f4 100644 --- a/pkg/newlog/cmd/vector.go +++ b/pkg/newlog/cmd/vector.go @@ -6,11 +6,13 @@ package main import ( "bufio" "encoding/base64" + "errors" "fmt" "net" "os" "path/filepath" "strings" + "time" fileutils "github.com/lf-edge/eve/pkg/pillar/utils/file" ) @@ -25,25 +27,44 @@ var ( candidateConfigPath = "/persist/vector/config/vector.yaml.new" ) +func createVectorSockets(sockPath string, backoffTime time.Duration) *net.UnixListener { + for { + // Create unix socket + if err := os.Remove(sockPath); errors.Is(err, os.ErrNotExist) { + // Socket doesn't exist, this is expected + } else if err != nil { + log.Errorf("createIncomingSockListener: Remove socket failed: %v", err) + time.Sleep(backoffTime) // wait before retry + continue + } + unixAddr, err := net.ResolveUnixAddr("unix", sockPath) + if err != nil { + log.Errorf("createIncomingSockListener: ResolveUnixAddr failed: %v", err) + time.Sleep(backoffTime) // wait before retry + continue + } + unixListener, err := net.ListenUnix("unix", unixAddr) + if err != nil { + log.Errorf("createIncomingSockListener: ListenUnix failed: %v", err) + time.Sleep(backoffTime) // wait before retry + continue + } + // Set permissions on socket + if err := os.Chmod(sockPath, 0666); err != nil { + log.Errorf("createIncomingSockListener: chmod socket failed: %v", err) + unixListener.Close() + time.Sleep(backoffTime) // wait before retry + continue + } + return unixListener + } +} + // listenOnSocketAndWriteToChan - goroutine to listen on unix sockets for incoming log entries func listenOnSocketAndWriteToChan(sockPath string, sendToChan chan<- string) { - // Create unix socket - os.Remove(sockPath) // Remove any existing socket - unixAddr, err := net.ResolveUnixAddr("unix", sockPath) - if err != nil { - log.Fatalf("createIncomingSockListener: ResolveUnixAddr failed: %v", err) - } - unixListener, err := net.ListenUnix("unix", unixAddr) - if err != nil { - log.Fatalf("createIncomingSockListener: ListenUnix failed: %v", err) - } - defer unixListener.Close() + unixListener := createVectorSockets(sockPath, 10*time.Second) defer os.Remove(sockPath) - - // Set permissions on socket - if err := os.Chmod(sockPath, 0666); err != nil { - log.Fatalf("createIncomingSockListener: chmod socket failed: %v", err) - } + defer unixListener.Close() // Handle socket connections for { diff --git a/pkg/newlog/cmd/vector_test.go b/pkg/newlog/cmd/vector_test.go new file mode 100644 index 00000000000..08c0041ea5b --- /dev/null +++ b/pkg/newlog/cmd/vector_test.go @@ -0,0 +1,72 @@ +// Copyright (c) 2025 Zededa, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "net" + "os" + "path/filepath" + "testing" + "time" + + "github.com/onsi/gomega" +) + +func TestCreateVectorSockets(t *testing.T) { + t.Parallel() + g := gomega.NewWithT(t) + + // Create a temporary directory for testing + tmpDir, err := os.MkdirTemp("", "vector_socket_test") + g.Expect(err).To(gomega.BeNil()) + defer os.RemoveAll(tmpDir) + + // Create a socket path in a subdirectory that doesn't exist yet + nonExistentDir := filepath.Join(tmpDir, "nonexistent") + sockPath := filepath.Join(nonExistentDir, "test.sock") + + unixListenerChan := make(chan *net.UnixListener, 1) + backoffPeriod := 100 * time.Millisecond + + go func() { + unixListener := createVectorSockets(sockPath, backoffPeriod) + unixListenerChan <- unixListener + }() + + time.Sleep(2 * backoffPeriod) // Wait to ensure retries happen + + // Verify that the socket was not created yet + _, err = os.Stat(sockPath) + g.Expect(os.IsNotExist(err)).To(gomega.BeTrue(), "Socket file should not exist yet") + + // Now create the directory + err = os.MkdirAll(nonExistentDir, 0755) + g.Expect(err).To(gomega.BeNil()) + + // Wait a bit to let the function succeed + var unixListener *net.UnixListener + select { + case unixListener = <-unixListenerChan: + // Successfully created the listener + case <-time.After(10 * backoffPeriod): + t.Fatal("Timeout waiting for createVectorSockets to succeed") + } + + // verify that the listener was created + g.Expect(unixListener).ToNot(gomega.BeNil(), "createVectorSockets should succeed after directory creation") + + // Verify the socket was created + info, err := os.Stat(sockPath) + g.Expect(err).To(gomega.BeNil(), "Socket file should be created after directory creation") + + expectedMode := os.FileMode(0666) + g.Expect(info.Mode().Perm()).To(gomega.Equal(expectedMode), "Socket permissions should be correct") + + // Verify we can connect to the socket + conn, err := net.Dial("unix", sockPath) + g.Expect(err).To(gomega.BeNil(), "Should be able to connect to socket") + conn.Close() + + t.Log("Test passed: socket created successfully and is connectable") +}