diff --git a/cmd/crio/main.go b/cmd/crio/main.go index 62a897a239b..9820d0ab04b 100644 --- a/cmd/crio/main.go +++ b/cmd/crio/main.go @@ -242,6 +242,19 @@ func main() { logrus.Fatal(err) } + if config.CleanShutdownFile != "" { + // clear out the shutdown file + if err := os.Remove(config.CleanShutdownFile); err != nil { + // not a fatal error, as it could have been cleaned up + logrus.Error(err) + } + + // and sync the changes to disk + if err := utils.SyncParent(config.CleanShutdownFile); err != nil { + logrus.Errorf("failed to sync parent directory of clean shutdown file: %v", err) + } + } + v1alpha2.Register(grpcServer, crioServer) v1.Register(grpcServer, crioServer) diff --git a/cmd/crio/wipe.go b/cmd/crio/wipe.go index e633af1ed86..e283459227d 100644 --- a/cmd/crio/wipe.go +++ b/cmd/crio/wipe.go @@ -32,6 +32,33 @@ func crioWipe(c *cli.Context) error { return err } + store, err := config.GetStore() + if err != nil { + return err + } + + // first, check whether crio has shutdown with time to sync + // if not, we should clear the storage directory + if config.CleanShutdownFile != "" { + if _, err := os.Stat(config.CleanShutdownFile); err != nil { + logrus.Infof("file %s not found. Wiping storage directory %s because of suspected dirty shutdown", config.CleanShutdownFile, store.GraphRoot()) + // If we do not do this, we may leak other resources that are not directly in the graphroot. + // Erroring here should not be fatal though, it's a best effort cleanup + if err := store.Wipe(); err != nil { + logrus.Infof("failed to wipe storage cleanly: %v", err) + } + // unmount storage or else we will fail with EBUSY + if _, err := store.Shutdown(false); err != nil { + return errors.Errorf("failed to shutdown storage before wiping: %v", err) + } + // totally remove storage, whatever is left (possibly orphaned layers) + if err := os.RemoveAll(store.GraphRoot()); err != nil { + return errors.Errorf("failed to remove storage directory: %v", err) + } + return nil + } + } + shouldWipeImages := true shouldWipeContainers := true // First, check if we need to upgrade at all @@ -64,11 +91,6 @@ func crioWipe(c *cli.Context) error { return nil } - store, err := config.GetStore() - if err != nil { - return err - } - cstore := ContainerStore{store} if err := cstore.wipeCrio(shouldWipeImages); err != nil { return err @@ -87,14 +109,14 @@ func (c ContainerStore) wipeCrio(shouldWipeImages bool) error { return err } if len(crioContainers) != 0 { - logrus.Infof("wiping containers") + logrus.Infof("Wiping containers") } for _, id := range crioContainers { c.deleteContainer(id) } if shouldWipeImages { if len(crioImages) != 0 { - logrus.Infof("wiping images") + logrus.Infof("Wiping images") } for _, id := range crioImages { c.deleteImage(id) diff --git a/completions/bash/crio b/completions/bash/crio index 8703f5e54af..22581fdb8dd 100755 --- a/completions/bash/crio +++ b/completions/bash/crio @@ -17,6 +17,7 @@ h --big-files-temporary-dir --bind-mount-prefix --cgroup-manager +--clean-shutdown-file --cni-config-dir --cni-default-network --cni-plugin-dir diff --git a/completions/fish/crio.fish b/completions/fish/crio.fish index d863b9603df..d72399065ef 100644 --- a/completions/fish/crio.fish +++ b/completions/fish/crio.fish @@ -14,6 +14,7 @@ complete -c crio -n '__fish_crio_no_subcommand' -f -l apparmor-profile -r -d 'Na complete -c crio -n '__fish_crio_no_subcommand' -f -l big-files-temporary-dir -r -d 'Path to the temporary directory to use for storing big files, used to store image blobs and data streams related to containers image management.' complete -c crio -n '__fish_crio_no_subcommand' -f -l bind-mount-prefix -r -d 'A prefix to use for the source of the bind mounts. This option would be useful if you were running CRI-O in a container. And had `/` mounted on `/host` in your container. Then if you ran CRI-O with the `--bind-mount-prefix=/host` option, CRI-O would add /host to any bind mounts it is handed over CRI. If Kubernetes asked to have `/var/lib/foobar` bind mounted into the container, then CRI-O would bind mount `/host/var/lib/foobar`. Since CRI-O itself is running in a container with `/` or the host mounted on `/host`, the container would end up with `/var/lib/foobar` from the host mounted in the container rather then `/var/lib/foobar` from the CRI-O container. (default: "")' complete -c crio -n '__fish_crio_no_subcommand' -f -l cgroup-manager -r -d 'cgroup manager (cgroupfs or systemd)' +complete -c crio -n '__fish_crio_no_subcommand' -l clean-shutdown-file -r -d 'Location for CRI-O to lay down the clean shutdown file. It indicates whether we\'ve had time to sync changes to disk before shutting down. If not found, crio wipe will clear the storage directory' complete -c crio -n '__fish_crio_no_subcommand' -l cni-config-dir -r -d 'CNI configuration files directory' complete -c crio -n '__fish_crio_no_subcommand' -f -l cni-default-network -r -d 'Name of the default CNI network to select. If not set or "", then CRI-O will pick-up the first one found in --cni-config-dir.' complete -c crio -n '__fish_crio_no_subcommand' -f -l cni-plugin-dir -r -d 'CNI plugin binaries directory' diff --git a/completions/zsh/_crio b/completions/zsh/_crio index 0aea3a378d6..a73fa4b4419 100644 --- a/completions/zsh/_crio +++ b/completions/zsh/_crio @@ -7,7 +7,7 @@ it later with **--config**. Global options will modify the output.' 'version:dis _describe 'commands' cmds local -a opts - opts=('--additional-devices' '--apparmor-profile' '--big-files-temporary-dir' '--bind-mount-prefix' '--cgroup-manager' '--cni-config-dir' '--cni-default-network' '--cni-plugin-dir' '--config' '--config-dir' '--conmon' '--conmon-cgroup' '--conmon-env' '--container-attach-socket-dir' '--container-exits-dir' '--ctr-stop-timeout' '--decryption-keys-path' '--default-capabilities' '--default-env' '--default-mounts-file' '--default-runtime' '--default-sysctls' '--default-transport' '--default-ulimits' '--drop-infra-ctr' '--enable-metrics' '--enable-profile-unix-socket' '--gid-mappings' '--global-auth-file' '--grpc-max-recv-msg-size' '--grpc-max-send-msg-size' '--hooks-dir' '--image-volumes' '--infra-ctr-cpuset' '--insecure-registry' '--irqbalance-config-file' '--listen' '--log' '--log-dir' '--log-filter' '--log-format' '--log-journald' '--log-level' '--log-size-max' '--metrics-port' '--metrics-socket' '--namespaces-dir' '--no-pivot' '--pause-command' '--pause-image' '--pause-image-auth-file' '--pids-limit' '--pinns-path' '--profile' '--profile-port' '--read-only' '--registries-conf' '--registry' '--root' '--runroot' '--runtimes' '--seccomp-profile' '--seccomp-use-default-when-empty' '--selinux' '--separate-pull-cgroup' '--signature-policy' '--storage-driver' '--storage-opt' '--stream-address' '--stream-enable-tls' '--stream-idle-timeout' '--stream-port' '--stream-tls-ca' '--stream-tls-cert' '--stream-tls-key' '--uid-mappings' '--version-file' '--version-file-persist' '--help' '--version') + opts=('--additional-devices' '--apparmor-profile' '--big-files-temporary-dir' '--bind-mount-prefix' '--cgroup-manager' '--clean-shutdown-file' '--cni-config-dir' '--cni-default-network' '--cni-plugin-dir' '--config' '--config-dir' '--conmon' '--conmon-cgroup' '--conmon-env' '--container-attach-socket-dir' '--container-exits-dir' '--ctr-stop-timeout' '--decryption-keys-path' '--default-capabilities' '--default-env' '--default-mounts-file' '--default-runtime' '--default-sysctls' '--default-transport' '--default-ulimits' '--drop-infra-ctr' '--enable-metrics' '--enable-profile-unix-socket' '--gid-mappings' '--global-auth-file' '--grpc-max-recv-msg-size' '--grpc-max-send-msg-size' '--hooks-dir' '--image-volumes' '--infra-ctr-cpuset' '--insecure-registry' '--irqbalance-config-file' '--listen' '--log' '--log-dir' '--log-filter' '--log-format' '--log-journald' '--log-level' '--log-size-max' '--metrics-port' '--metrics-socket' '--namespaces-dir' '--no-pivot' '--pause-command' '--pause-image' '--pause-image-auth-file' '--pids-limit' '--pinns-path' '--profile' '--profile-port' '--read-only' '--registries-conf' '--registry' '--root' '--runroot' '--runtimes' '--seccomp-profile' '--seccomp-use-default-when-empty' '--selinux' '--separate-pull-cgroup' '--signature-policy' '--storage-driver' '--storage-opt' '--stream-address' '--stream-enable-tls' '--stream-idle-timeout' '--stream-port' '--stream-tls-ca' '--stream-tls-cert' '--stream-tls-key' '--uid-mappings' '--version-file' '--version-file-persist' '--help' '--version') _describe 'global options' opts return diff --git a/docs/crio.8.md b/docs/crio.8.md index 5d6cfe3f4ad..c71a0d26c23 100644 --- a/docs/crio.8.md +++ b/docs/crio.8.md @@ -16,6 +16,7 @@ crio [--big-files-temporary-dir]=[value] [--bind-mount-prefix]=[value] [--cgroup-manager]=[value] +[--clean-shutdown-file]=[value] [--cni-config-dir]=[value] [--cni-default-network]=[value] [--cni-plugin-dir]=[value] @@ -126,6 +127,8 @@ crio [GLOBAL OPTIONS] command [COMMAND OPTIONS] [ARGUMENTS...] **--cgroup-manager**="": cgroup manager (cgroupfs or systemd) (default: systemd) +**--clean-shutdown-file**="": Location for CRI-O to lay down the clean shutdown file. It indicates whether we've had time to sync changes to disk before shutting down. If not found, crio wipe will clear the storage directory (default: /var/lib/crio/clean.shutdown) + **--cni-config-dir**="": CNI configuration files directory (default: /etc/cni/net.d/) **--cni-default-network**="": Name of the default CNI network to select. If not set or "", then CRI-O will pick-up the first one found in --cni-config-dir. diff --git a/docs/crio.conf.5.md b/docs/crio.conf.5.md index bbc2b8990f2..80c71fd1010 100644 --- a/docs/crio.conf.5.md +++ b/docs/crio.conf.5.md @@ -54,6 +54,11 @@ CRI-O reads its storage defaults from the containers-storage.conf(5) file locate It is used to check if crio wipe should wipe images, which should only happen when CRI-O has been upgraded +**clean_shutdown_file**="/var/lib/crio/clean.shutdown" + Location for CRI-O to lay down the clean shutdown file. + It is used to check whether crio had time to sync before shutting down. + If not found, crio wipe will clear the storage directory. + ## CRIO.API TABLE The `crio.api` table contains settings for the kubelet/gRPC interface. diff --git a/internal/criocli/criocli.go b/internal/criocli/criocli.go index 14dc18a3830..ea43e6520ed 100644 --- a/internal/criocli/criocli.go +++ b/internal/criocli/criocli.go @@ -281,6 +281,9 @@ func mergeConfig(config *libconfig.Config, ctx *cli.Context) error { if ctx.IsSet("version-file-persist") { config.VersionFilePersist = ctx.String("version-file-persist") } + if ctx.IsSet("clean-shutdown-file") { + config.CleanShutdownFile = ctx.String("clean-shutdown-file") + } if ctx.IsSet("enable-metrics") { config.EnableMetrics = ctx.Bool("enable-metrics") } @@ -824,6 +827,13 @@ func getCrioFlags(defConf *libconfig.Config) []cli.Flag { Usage: "CPU set to run infra containers, if not specified CRI-O will use all online CPUs to run infra containers (default: '').", EnvVars: []string{"CONTAINER_INFRA_CTR_CPUSET"}, }, + &cli.StringFlag{ + Name: "clean-shutdown-file", + Usage: "Location for CRI-O to lay down the clean shutdown file. It indicates whether we've had time to sync changes to disk before shutting down. If not found, crio wipe will clear the storage directory", + Value: defConf.CleanShutdownFile, + EnvVars: []string{"CONTAINER_CLEAN_SHUTDOWN_FILE"}, + TakesFile: true, + }, } } diff --git a/pkg/config/config.go b/pkg/config/config.go index 0e2e8f2d016..7222eaef877 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -148,6 +148,10 @@ type RootConfig struct { // VersionFilePersist is the location CRI-O will lay down the version file // that checks whether we've upgraded VersionFilePersist string `toml:"version_file_persist"` + + // CleanShutdownFile is the location CRI-O will lay down the clean shutdown file + // that checks whether we've had time to sync before shutting down + CleanShutdownFile string `toml:"clean_shutdown_file"` } // RuntimeHandler represents each item of the "crio.runtime.runtimes" TOML @@ -583,6 +587,7 @@ func DefaultConfig() (*Config, error) { LogDir: "/var/log/crio/pods", VersionFile: CrioVersionPathTmp, VersionFilePersist: CrioVersionPathPersist, + CleanShutdownFile: CrioCleanShutdownFile, }, APIConfig: APIConfig{ Listen: CrioSocketPath, diff --git a/pkg/config/config_unix.go b/pkg/config/config_unix.go index bfe613749d6..a90c716e3c2 100644 --- a/pkg/config/config_unix.go +++ b/pkg/config/config_unix.go @@ -25,4 +25,9 @@ const ( // CrioVersionPathPersist is where the CRI-O version file is located // used to check whether we've upgraded, and thus need to remove images CrioVersionPathPersist = "/var/lib/crio/version" + + // CrioCleanShutdownFile is the location CRI-O will lay down the clean shutdown file + // that checks whether we've had time to sync before shutting down. + // If not, crio wipe will clear the storage directory. + CrioCleanShutdownFile = "/var/lib/crio/clean.shutdown" ) diff --git a/pkg/config/template.go b/pkg/config/template.go index 5059dd6ed4f..d7c88dd4f66 100644 --- a/pkg/config/template.go +++ b/pkg/config/template.go @@ -62,6 +62,11 @@ version_file = "{{ .VersionFile }}" # only happen when CRI-O has been upgraded version_file_persist = "{{ .VersionFilePersist }}" +# Location for CRI-O to lay down the clean shutdown file. +# It is used to check whether crio had time to sync before shutting down. +# If not found, crio wipe will clear the storage directory. +clean_shutdown_file = "{{ .CleanShutdownFile }}" + # The crio.api table contains settings for the kubelet/gRPC interface. [crio.api] diff --git a/server/server.go b/server/server.go index e26a6adaee4..078682cf469 100644 --- a/server/server.go +++ b/server/server.go @@ -29,6 +29,7 @@ import ( "github.com/cri-o/cri-o/server/cri/types" "github.com/cri-o/cri-o/server/metrics" "github.com/cri-o/cri-o/server/streaming" + "github.com/cri-o/cri-o/utils" "github.com/fsnotify/fsnotify" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -277,7 +278,38 @@ func (s *Server) Shutdown(ctx context.Context) error { // crio.service restart!!! s.cleanupSandboxesOnShutdown(ctx) - return s.ContainerServer.Shutdown() + if err := s.ContainerServer.Shutdown(); err != nil { + return err + } + + // first, make sure we sync all storage changes + if err := utils.Sync(s.Store().GraphRoot()); err != nil { + return errors.Wrapf(err, "failed to sync graph root after shutting down") + } + + if s.config.CleanShutdownFile != "" { + // then, we write the CleanShutdownFile + // we do this after the sync, to ensure ordering. + // Otherwise, we may run into situations where the CleanShutdownFile + // is written before storage, causing us to think a corrupted storage + // is not so. + f, err := os.Create(s.config.CleanShutdownFile) + if err != nil { + return errors.Wrapf(err, "failed to write file to indicate a clean shutdown") + } + f.Close() + + // finally, attempt to sync the newly created file to disk. + // It's still possible we crash after Create but before this Sync, + // which will lead us to think storage wasn't synced. + // However, that's much less likely than if we don't have a second Sync, + // and less risky than if we don't Sync after the Create + if err := utils.SyncParent(s.config.CleanShutdownFile); err != nil { + return errors.Wrapf(err, "failed to sync clean shutdown file") + } + } + + return nil } // configureMaxThreads sets the Go runtime max threads threshold diff --git a/server/server_test.go b/server/server_test.go index 81a867bb25c..996112423f3 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -2,6 +2,7 @@ package server_test import ( "context" + "os" cstorage "github.com/containers/storage" "github.com/cri-o/cri-o/server" @@ -267,6 +268,7 @@ var _ = t.Describe("Server", func() { // Given gomock.InOrder( storeMock.EXPECT().Shutdown(gomock.Any()).Return(nil, nil), + storeMock.EXPECT().GraphRoot().Return(emptyDir), ) // When @@ -274,6 +276,10 @@ var _ = t.Describe("Server", func() { // Then Expect(err).To(BeNil()) + + // expect cri-o to have created the clean shutdown file + _, err = os.Stat(sut.Config().CleanShutdownFile) + Expect(err).To(BeNil()) }) }) diff --git a/server/suite_test.go b/server/suite_test.go index 325eb9d6b6d..cd48f55aee4 100644 --- a/server/suite_test.go +++ b/server/suite_test.go @@ -143,6 +143,7 @@ var beforeEach = func() { serverConfig.ContainerAttachSocketDir = testPath serverConfig.ContainerExitsDir = path.Join(testPath, "exits") serverConfig.LogDir = path.Join(testPath, "log") + serverConfig.CleanShutdownFile = path.Join(testPath, "clean.shutdown") // We want a directory that is guaranteed to exist, but it must // be empty so we don't erroneously load anything and make tests diff --git a/test/crio-wipe.bats b/test/crio-wipe.bats index 271735c0e5d..6eb436d2760 100644 --- a/test/crio-wipe.bats +++ b/test/crio-wipe.bats @@ -10,6 +10,7 @@ function setup() { setup_test export CONTAINER_VERSION_FILE="$TESTDIR"/version.tmp export CONTAINER_VERSION_FILE_PERSIST="$TESTDIR"/version-persist.tmp + export CONTAINER_CLEAN_SHUTDOWN_FILE="$TESTDIR"/clean-shutdown.tmp } function run_podman_with_args() { @@ -115,3 +116,70 @@ function start_crio_with_stopped_pod() { run_podman_with_args ps -a | grep test } + +@test "don't clear everything when not asked to check shutdown" { + start_crio_with_stopped_pod + stop_crio_no_clean + + rm "$CONTAINER_CLEAN_SHUTDOWN_FILE" + + CONTAINER_CLEAN_SHUTDOWN_FILE="" run_crio_wipe + + start_crio_no_setup + + test_crio_did_not_wipe_containers + test_crio_did_not_wipe_images +} + +@test "do clear everything when shutdown file not found" { + start_crio_with_stopped_pod + stop_crio_no_clean + + rm "$CONTAINER_CLEAN_SHUTDOWN_FILE" + + run_crio_wipe + + start_crio_no_setup + + test_crio_wiped_containers + test_crio_wiped_images +} + +@test "do clear podman containers when shutdown file not found" { + if [[ -z "$PODMAN_BINARY" ]]; then + skip "Podman not installed" + fi + + start_crio_with_stopped_pod + stop_crio_no_clean + + run_podman_with_args run --name test quay.io/crio/busybox:latest ls + # all podman containers would be stopped after a reboot + run_podman_with_args stop -a + + rm "$CONTAINER_CLEAN_SHUTDOWN_FILE" + + run_crio_wipe + + run_podman_with_args ps -a + [[ ! "$output" =~ "test" ]] +} + +@test "fail to clear podman containers when shutdown file not found but container still running" { + if [[ -z "$PODMAN_BINARY" ]]; then + skip "Podman not installed" + fi + + start_crio_with_stopped_pod + stop_crio_no_clean + + # all podman containers would be stopped after a reboot + run_podman_with_args run --name test -d quay.io/crio/busybox:latest top + + rm "$CONTAINER_CLEAN_SHUTDOWN_FILE" + + run "$CRIO_BINARY_PATH" --config "$CRIO_CONFIG" wipe + echo "$status" + echo "$output" + [ "$status" -ne 0 ] +} diff --git a/utils/utils.go b/utils/utils.go index ef105a36dae..4721e9720e0 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -341,3 +341,22 @@ func GetLabelOptions(selinuxOptions *types.SELinuxOption) []string { } return labels } + +// SyncParent ensures a path's parent directory is synced to disk +func SyncParent(path string) error { + return Sync(filepath.Dir(path)) +} + +// Sync ensures a path is synced to disk +func Sync(path string) error { + f, err := os.OpenFile(path, os.O_RDONLY, 0o755) + if err != nil { + return err + } + defer f.Close() + + if err := syscall.Fsync(int(f.Fd())); err != nil { + return err + } + return nil +}