diff --git a/README.md b/README.md index 826d87908d9..4c4d5526baa 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,7 @@ The following API entry points are currently supported: | `/pause/:id` | `application/json` | Pause a running container. | | `/unpause/:id` | `application/json` | Unpause a paused container. | | `/debug/goroutines` | `text/plain` | Print the goroutine stacks. | +| `/debug/heap` | `text/plain` | Write the heap dump. | diff --git a/cmd/crio/main.go b/cmd/crio/main.go index 900c6cadb06..428e4c33ebd 100644 --- a/cmd/crio/main.go +++ b/cmd/crio/main.go @@ -41,10 +41,7 @@ import ( ) func writeCrioGoroutineStacks() { - path := filepath.Join("/tmp", fmt.Sprintf( - "crio-goroutine-stacks-%s.log", - strings.ReplaceAll(time.Now().Format(time.RFC3339), ":", ""), - )) + path := filepath.Join(os.TempDir(), fmt.Sprintf("crio-goroutine-stacks-%s.log", criocli.Timestamp())) if err := utils.WriteGoroutineStacksToFile(path); err != nil { logrus.Warnf("Failed to write goroutine stacks: %s", err) } diff --git a/completions/fish/crio.fish b/completions/fish/crio.fish index bc12db3a21a..7f476bb0eb5 100644 --- a/completions/fish/crio.fish +++ b/completions/fish/crio.fish @@ -2,7 +2,7 @@ function __fish_crio_no_subcommand --description 'Test if there has been any subcommand yet' for i in (commandline -opc) - if contains -- $i check complete completion help h config man markdown md status config c containers container cs s info i goroutines g version wipe help h + if contains -- $i check complete completion help h config man markdown md status config c containers container cs s info i goroutines g heap hp version wipe help h return 1 end end @@ -223,6 +223,9 @@ complete -c crio -n '__fish_seen_subcommand_from info i' -f -l help -s h -d 'sho complete -r -c crio -n '__fish_seen_subcommand_from status' -a 'info i' -d 'Retrieve generic information about CRI-O, such as the cgroup and storage driver.' complete -c crio -n '__fish_seen_subcommand_from goroutines g' -f -l help -s h -d 'show help' complete -r -c crio -n '__fish_seen_subcommand_from status' -a 'goroutines g' -d 'Display the goroutine stack.' +complete -c crio -n '__fish_seen_subcommand_from heap hp' -f -l help -s h -d 'show help' +complete -r -c crio -n '__fish_seen_subcommand_from status' -a 'heap hp' -d 'Write the heap dump to a temp file and print its location on disk.' +complete -c crio -n '__fish_seen_subcommand_from heap hp' -l file -s f -r -d 'Output file of the heap dump.' complete -c crio -n '__fish_seen_subcommand_from version' -f -l help -s h -d 'show help' complete -r -c crio -n '__fish_crio_no_subcommand' -a 'version' -d 'display detailed version information' complete -c crio -n '__fish_seen_subcommand_from version' -f -l json -s j -d 'print JSON instead of text' diff --git a/docs/crio.8.md b/docs/crio.8.md index d2e7efe7820..9c5fd7a0b60 100644 --- a/docs/crio.8.md +++ b/docs/crio.8.md @@ -523,6 +523,12 @@ Retrieve generic information about CRI-O, such as the cgroup and storage driver. Display the goroutine stack. +### heap, hp + +Write the heap dump to a temp file and print its location on disk. + +**--file, -f**="": Output file of the heap dump. + ## version display detailed version information diff --git a/internal/client/client.go b/internal/client/client.go index e6d7b700616..771e093cdd9 100644 --- a/internal/client/client.go +++ b/internal/client/client.go @@ -24,6 +24,7 @@ type CrioClient interface { ContainerInfo(context.Context, string) (*types.ContainerInfo, error) ConfigInfo(context.Context) (string, error) GoRoutinesInfo(context.Context) (string, error) + HeapInfo(context.Context) ([]byte, error) } type crioClientImpl struct { @@ -126,3 +127,12 @@ func (c *crioClientImpl) GoRoutinesInfo(ctx context.Context) (string, error) { } return string(body), nil } + +// HeapInfo writes a heap dump. +func (c *crioClientImpl) HeapInfo(ctx context.Context) ([]byte, error) { + body, err := c.doGetRequest(ctx, server.InspectHeapEndpoint) + if err != nil { + return nil, err + } + return body, nil +} diff --git a/internal/criocli/criocli.go b/internal/criocli/criocli.go index 13010c3d92e..48f6fb5d7e5 100644 --- a/internal/criocli/criocli.go +++ b/internal/criocli/criocli.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "strings" + "time" "github.com/docker/go-units" "github.com/sirupsen/logrus" @@ -1256,3 +1257,8 @@ func StringSliceTrySplit(ctx *cli.Context, name string) []string { return trimmedValues } + +// Timestamp returns a string timestamp representation. +func Timestamp() string { + return strings.ReplaceAll(time.Now().Format(time.RFC3339), ":", "") +} diff --git a/internal/criocli/status.go b/internal/criocli/status.go index 8b307a2f757..01d3b7f1e0e 100644 --- a/internal/criocli/status.go +++ b/internal/criocli/status.go @@ -2,8 +2,10 @@ package criocli import ( "fmt" + "os" "strings" + "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" "github.com/cri-o/cri-o/internal/client" @@ -53,6 +55,19 @@ var StatusCommand = &cli.Command{ Aliases: []string{"g"}, Name: "goroutines", Usage: "Display the goroutine stack.", + }, { + Action: heap, + Aliases: []string{"hp"}, + Name: "heap", + Usage: "Write the heap dump to a temp file and print its location on disk.", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "file", + Aliases: []string{"f"}, + Usage: "Output file of the heap dump.", + TakesFile: true, + }, + }, }}, } @@ -159,3 +174,41 @@ func goroutines(c *cli.Context) error { return nil } + +func heap(c *cli.Context) error { + crioClient, err := crioClient(c) + if err != nil { + return err + } + + data, err := crioClient.HeapInfo(c.Context) + if err != nil { + return err + } + + outputPath := c.String("file") + switch outputPath { + case "-": + if _, err := os.Stdout.Write(data); err != nil { + return fmt.Errorf("write heap dump to stdout: %w", err) + } + + case "": + outputPath = fmt.Sprintf("crio-heap-%s.out", Timestamp()) + fallthrough + + default: + file, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("create output file %s: %w", outputPath, err) + } + + if _, err := file.Write(data); err != nil { + return fmt.Errorf("write heap dump: %w", err) + } + + logrus.Infof("Wrote heap dump to: %s", outputPath) + } + + return nil +} diff --git a/server/inspect.go b/server/inspect.go index 950b39d6c85..a08832ef352 100644 --- a/server/inspect.go +++ b/server/inspect.go @@ -3,9 +3,12 @@ package server import ( "errors" "fmt" + "io" "math" "net/http" "net/http/pprof" + "os" + "runtime/debug" "github.com/containers/storage/pkg/idtools" "github.com/go-chi/chi/v5" @@ -126,6 +129,7 @@ const ( InspectPauseEndpoint = "/pause" InspectUnpauseEndpoint = "/unpause" InspectGoRoutinesEndpoint = "/debug/goroutines" + InspectHeapEndpoint = "/debug/heap" ) // GetExtendInterfaceMux returns the mux used to serve extend interface requests. @@ -254,6 +258,28 @@ func (s *Server) GetExtendInterfaceMux(enableProfile bool) *chi.Mux { } })) + mux.Get(InspectHeapEndpoint, http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + w.Header().Set("Content-Type", "application/octet-stream") + + f, err := os.CreateTemp("", "cri-o-heap-*.out") + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer os.Remove(f.Name()) + debug.WriteHeapDump(f.Fd()) + + if _, err := f.Seek(0, 0); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if _, err := io.Copy(w, f); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + })) + // Add pprof handlers if enableProfile { mux.Get("/debug/pprof/cmdline", http.HandlerFunc(pprof.Cmdline)) diff --git a/test/status.bats b/test/status.bats index fd29cc0e02a..f3e3c591295 100644 --- a/test/status.bats +++ b/test/status.bats @@ -64,3 +64,9 @@ function teardown() { run -0 "${CRIO_BINARY_PATH}" status --socket="${CRIO_SOCKET}" goroutines [[ "$output" == *"goroutine"* ]] } + +@test "status should succeed to retrieve a heap dump" { + run -0 "${CRIO_BINARY_PATH}" status --socket="${CRIO_SOCKET}" heap -f "$TESTDIR/heap.out" + [[ "$output" == *"Wrote heap dump to: $TESTDIR/heap.out"* ]] + [ -f "$TESTDIR/heap.out" ] +}