diff --git a/.gitignore b/.gitignore index b09f620e006..c54c7b10bf2 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ /bin/ /test/bin2img/bin2img /test/checkseccomp/checkseccomp +/test/checkcriu/checkcriu /test/copyimg/copyimg /build coverprofile diff --git a/Makefile b/Makefile index 43f085918e7..ebec08cf9af 100644 --- a/Makefile +++ b/Makefile @@ -182,6 +182,9 @@ test/copyimg/copyimg: $(GO_FILES) .gopathok test/checkseccomp/checkseccomp: $(GO_FILES) .gopathok $(GO_BUILD) $(GCFLAGS) $(GO_LDFLAGS) -tags "$(BUILDTAGS)" -o $@ $(PROJECT)/test/checkseccomp +test/checkcriu/checkcriu: $(GO_FILES) .gopathok + $(GO_BUILD) $(GCFLAGS) $(GO_LDFLAGS) -tags "$(BUILDTAGS)" -o $@ $(PROJECT)/test/checkcriu + bin/crio: $(GO_FILES) .gopathok $(GO_BUILD) $(GCFLAGS) $(GO_LDFLAGS) -tags "$(BUILDTAGS)" -o $@ $(PROJECT)/cmd/crio @@ -225,6 +228,7 @@ endif $(MAKE) -C pinns clean rm -f test/copyimg/copyimg rm -f test/checkseccomp/checkseccomp + rm -f test/checkcriu/checkcriu rm -rf ${BUILD_BIN_PATH} # the approach here, rather than this target depending on the build targets @@ -382,7 +386,7 @@ localintegration: clean binaries test-binaries ./test/test_runner.sh ${TESTFLAGS} binaries: bin/crio bin/crio-status bin/pinns -test-binaries: test/copyimg/copyimg test/checkseccomp/checkseccomp +test-binaries: test/copyimg/copyimg test/checkseccomp/checkseccomp test/checkcriu/checkcriu MANPAGES_MD := $(wildcard docs/*.md) MANPAGES := $(MANPAGES_MD:%.md=%) diff --git a/completions/bash/crio b/completions/bash/crio index ce387c5026d..601c88c81c3 100755 --- a/completions/bash/crio +++ b/completions/bash/crio @@ -44,6 +44,7 @@ h --default-ulimits --device-ownership-from-security-context --drop-infra-ctr +--enable-criu-support --enable-metrics --enable-profile-unix-socket --enable-tracing diff --git a/completions/fish/crio.fish b/completions/fish/crio.fish index a27c45063b3..1a6916ed97d 100644 --- a/completions/fish/crio.fish +++ b/completions/fish/crio.fish @@ -49,6 +49,7 @@ complete -c crio -n '__fish_crio_no_subcommand' -f -l default-transport -r -d 'A complete -c crio -n '__fish_crio_no_subcommand' -f -l default-ulimits -r -d 'Ulimits to apply to containers by default (name=soft:hard) (default: [])' complete -c crio -n '__fish_crio_no_subcommand' -f -l device-ownership-from-security-context -d 'Set devices\' uid/gid ownership from runAsUser/runAsGroup' complete -c crio -n '__fish_crio_no_subcommand' -f -l drop-infra-ctr -d 'Determines whether pods are created without an infra container, when the pod is not using a pod level PID namespace (default: true)' +complete -c crio -n '__fish_crio_no_subcommand' -f -l enable-criu-support -d 'Enable CRIU integration, requires that the criu binary is available in $PATH. (default: \'\')' complete -c crio -n '__fish_crio_no_subcommand' -f -l enable-metrics -d 'Enable metrics endpoint for the server on localhost:9090' complete -c crio -n '__fish_crio_no_subcommand' -f -l enable-profile-unix-socket -d 'Enable pprof profiler on crio unix domain socket' complete -c crio -n '__fish_crio_no_subcommand' -f -l enable-tracing -d 'Enable OpenTelemetry trace data exporting' diff --git a/completions/zsh/_crio b/completions/zsh/_crio index 1d18ea7ede1..5d45075fa41 100644 --- a/completions/zsh/_crio +++ b/completions/zsh/_crio @@ -51,6 +51,7 @@ it later with **--config**. Global options will modify the output.' '--default-ulimits' '--device-ownership-from-security-context' '--drop-infra-ctr' + '--enable-criu-support' '--enable-metrics' '--enable-profile-unix-socket' '--enable-tracing' diff --git a/contrib/test/integration/main.yml b/contrib/test/integration/main.yml index 00ec2b05c6a..2356ebfa82a 100644 --- a/contrib/test/integration/main.yml +++ b/contrib/test/integration/main.yml @@ -20,7 +20,7 @@ - name: clone build and install cri-tools include: "build/cri-tools.yml" vars: - cri_tools_git_version: "v1.24.2" + cri_tools_git_version: "v1.25.0" - name: clone build and install kubernetes include: "build/kubernetes.yml" diff --git a/dependencies.yaml b/dependencies.yaml index 1772b53b10e..16bc7a16c65 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -42,7 +42,7 @@ dependencies: match: conmon - name: cri-tools - version: v1.24.2 + version: v1.25.0 refPaths: - path: scripts/versions match: cri-tools diff --git a/docs/crio.8.md b/docs/crio.8.md index 8f810633895..130f67e86fd 100644 --- a/docs/crio.8.md +++ b/docs/crio.8.md @@ -42,6 +42,7 @@ crio [--default-ulimits]=[value] [--device-ownership-from-security-context] [--drop-infra-ctr] +[--enable-criu-support] [--enable-metrics] [--enable-profile-unix-socket] [--enable-tracing] @@ -206,6 +207,8 @@ crio [GLOBAL OPTIONS] command [COMMAND OPTIONS] [ARGUMENTS...] **--drop-infra-ctr**: Determines whether pods are created without an infra container, when the pod is not using a pod level PID namespace (default: true) +**--enable-criu-support**: Enable CRIU integration, requires that the criu binary is available in $PATH. (default: '') + **--enable-metrics**: Enable metrics endpoint for the server on localhost:9090 **--enable-profile-unix-socket**: Enable pprof profiler on crio unix domain socket diff --git a/docs/crio.conf.5.md b/docs/crio.conf.5.md index dfb213daf9c..86549856a11 100644 --- a/docs/crio.conf.5.md +++ b/docs/crio.conf.5.md @@ -283,6 +283,9 @@ the container runtime configuration. **device_ownership_from_security_context**=false Changes the default behavior of setting container devices uid/gid from CRI's SecurityContext (RunAsUser/RunAsGroup) instead of taking host's uid/gid. +**enable_criu_support**=false + Enable CRIU integration, requires that the criu binary is available in $PATH. (default: false) + ### CRIO.RUNTIME.RUNTIMES TABLE The "crio.runtime.runtimes" table defines a list of OCI compatible runtimes. The runtime to use is picked based on the runtime handler provided by the CRI. If no runtime handler is provided, the runtime will be picked based on the level of trust of the workload. diff --git a/go.mod b/go.mod index 719c8ac19bc..b15d7f900db 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,8 @@ require ( github.com/BurntSushi/toml v1.2.0 github.com/Microsoft/go-winio v0.5.2 github.com/blang/semver/v4 v4.0.0 + github.com/checkpoint-restore/checkpointctl v0.0.0-20220321135231-33f4a66335f0 + github.com/checkpoint-restore/go-criu/v5 v5.3.0 github.com/container-orchestrated-devices/container-device-interface v0.5.0 github.com/containerd/cgroups v1.0.4 github.com/containerd/containerd v1.6.8 @@ -111,7 +113,6 @@ require ( github.com/VividCortex/ewma v1.2.0 // indirect github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect github.com/acomagu/bufpipe v1.0.3 // indirect - github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d // indirect github.com/aws/aws-sdk-go v1.44.55 // indirect github.com/aws/aws-sdk-go-v2 v1.16.5 // indirect @@ -136,8 +137,6 @@ require ( github.com/cenkalti/backoff/v4 v4.1.3 // indirect github.com/census-instrumentation/opencensus-proto v0.3.0 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect - github.com/checkpoint-restore/checkpointctl v0.0.0-20220321135231-33f4a66335f0 // indirect - github.com/checkpoint-restore/go-criu/v5 v5.3.0 // indirect github.com/cheggaaa/pb/v3 v3.0.8 // indirect github.com/chrismellard/docker-credential-acr-env v0.0.0-20220119192733-fe33c00cee21 // indirect github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect diff --git a/go.sum b/go.sum index 516ebf1d71b..54119603277 100644 --- a/go.sum +++ b/go.sum @@ -283,9 +283,8 @@ github.com/alexflint/go-filemutex v1.1.0/go.mod h1:7P4iRhttt/nUvUOrYIhcpMzv2G6CY github.com/alexkohler/prealloc v1.0.0/go.mod h1:VetnK3dIgFBBKmg0YnD9F9x6Icjd+9cvfHR56wJVlKE= github.com/andybalholm/brotli v1.0.2/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.3/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 h1:kFOfPq6dUM1hTo4JG6LR5AXSUEsOjtdm0kw0FtQtMJA= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= -github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= -github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20220418222510-f25a4f6275ed/go.mod h1:F7bn7fEU90QkQ3tnmaTx3LTKLEDqnwWODIYppRQ5hnY= diff --git a/internal/criocli/criocli.go b/internal/criocli/criocli.go index 04a5f64fede..7e13457431a 100644 --- a/internal/criocli/criocli.go +++ b/internal/criocli/criocli.go @@ -278,6 +278,9 @@ func mergeConfig(config *libconfig.Config, ctx *cli.Context) error { if ctx.IsSet("container-exits-dir") { config.ContainerExitsDir = ctx.String("container-exits-dir") } + if ctx.IsSet("enable-criu-support") { + config.EnableCriuSupport = ctx.Bool("enable-criu-support") + } if ctx.IsSet("ctr-stop-timeout") { config.CtrStopTimeout = ctx.Int64("ctr-stop-timeout") } @@ -1012,6 +1015,12 @@ func getCrioFlags(defConf *libconfig.Config) []cli.Flag { Usage: "The number of seconds between collecting pod and container stats. If set to 0, the stats are collected on-demand instead.", EnvVars: []string{"CONTAINER_STATS_COLLECTION_PERIOD"}, }, + &cli.BoolFlag{ + Name: "enable-criu-support", + Usage: "Enable CRIU integration, requires that the criu binary is available in $PATH. (default: '')", + EnvVars: []string{"CONTAINER_ENABLE_CRIU_SUPPORT"}, + Value: false, + }, } } diff --git a/internal/factory/container/container.go b/internal/factory/container/container.go index 4d5ab9a4740..01d90e486a9 100644 --- a/internal/factory/container/container.go +++ b/internal/factory/container/container.go @@ -43,7 +43,9 @@ type Container interface { SetConfig(*types.ContainerConfig, *types.PodSandboxConfig) error // SetNameAndID sets a container name and ID - SetNameAndID() error + // It can either generate a new ID or use an existing ID + // if specified as parameter (for container restore) + SetNameAndID(string) error // Config returns the container CRI configuration Config() *types.ContainerConfig @@ -84,6 +86,13 @@ type Container interface { // it takes the sandbox's label, which it falls back upon SelinuxLabel(string) ([]string, error) + // SetRestore marks the container as being restored from a checkpoint + SetRestore(bool) + + // Restore returns if the container is marked as being + // restored from a checkpoint + Restore() bool + // spec functions // returns the spec @@ -128,6 +137,7 @@ type container struct { id string name string privileged bool + restore bool spec generate.Generator pidns nsmgr.Namespace } @@ -306,7 +316,7 @@ func (c *container) SetConfig(cfg *types.ContainerConfig, sboxConfig *types.PodS } // SetNameAndID sets a container name and ID -func (c *container) SetNameAndID() error { +func (c *container) SetNameAndID(oldID string) error { if c.config == nil { return errors.New("config is not set") } @@ -319,7 +329,12 @@ func (c *container) SetNameAndID() error { return errors.New("sandbox metadata is nil") } - id := stringid.GenerateNonCryptoID() + var id string + if oldID == "" { + id = stringid.GenerateNonCryptoID() + } else { + id = oldID + } name := strings.Join([]string{ "k8s", c.config.Metadata.Name, @@ -354,6 +369,17 @@ func (c *container) Name() string { return c.name } +// Restore returns if the container is marked as being +// restored from a checkpoint +func (c *container) Restore() bool { + return c.restore +} + +// SetRestore marks the container as being restored from a checkpoint +func (c *container) SetRestore(restore bool) { + c.restore = restore +} + // SetPrivileged sets the privileged bool for the container func (c *container) SetPrivileged() error { if c.config == nil { diff --git a/internal/factory/container/container_log_path_test.go b/internal/factory/container/container_log_path_test.go index 96707eb1075..0aea1ce5816 100644 --- a/internal/factory/container/container_log_path_test.go +++ b/internal/factory/container/container_log_path_test.go @@ -66,7 +66,7 @@ var _ = t.Describe("Container:LogPath", func() { // When Expect(sut.SetConfig(config, sboxConfig)).To(BeNil()) - Expect(sut.SetNameAndID()).To(BeNil()) + Expect(sut.SetNameAndID("")).To(BeNil()) // Then logPath, err := sut.LogPath(providedLogDir) diff --git a/internal/factory/container/container_setnameandid_test.go b/internal/factory/container/container_setnameandid_test.go index 4dd8279afda..26fd17e0af8 100644 --- a/internal/factory/container/container_setnameandid_test.go +++ b/internal/factory/container/container_setnameandid_test.go @@ -27,7 +27,7 @@ var _ = t.Describe("Container:SetNameAndID", func() { setupContainerWithMetadata(metadata) // When - err := sut.SetNameAndID() + err := sut.SetNameAndID("") // Then Expect(err).To(BeNil()) @@ -37,13 +37,36 @@ var _ = t.Describe("Container:SetNameAndID", func() { Expect(sut.Name()).To(ContainSubstring(uid)) }) + It("should succeed with ID as paramater", func() { + // Given + const ( + name = "name" + namespace = "namespace" + uid = "uid" + ) + metadata := &types.PodSandboxMetadata{ + Name: name, Uid: uid, Namespace: namespace, + } + setupContainerWithMetadata(metadata) + + // When + err := sut.SetNameAndID("use-this-ID") + + // Then + Expect(err).To(BeNil()) + Expect(sut.ID()).To(Equal("use-this-ID")) + Expect(sut.Name()).To(ContainSubstring(name)) + Expect(sut.Name()).To(ContainSubstring(namespace)) + Expect(sut.Name()).To(ContainSubstring(uid)) + }) + It("should succeed with empty sandbox metadata", func() { // Given metadata := &types.PodSandboxMetadata{} setupContainerWithMetadata(metadata) // When - err := sut.SetNameAndID() + err := sut.SetNameAndID("") // Then Expect(err).To(BeNil()) @@ -55,7 +78,7 @@ var _ = t.Describe("Container:SetNameAndID", func() { container, err := container.New() Expect(err).To(BeNil()) - err = container.SetNameAndID() + err = container.SetNameAndID("") // Then Expect(container).ToNot(BeNil()) diff --git a/internal/factory/container/container_test.go b/internal/factory/container/container_test.go index b2857c170cd..0bca350ef6b 100644 --- a/internal/factory/container/container_test.go +++ b/internal/factory/container/container_test.go @@ -253,6 +253,24 @@ var _ = t.Describe("Container", func() { Expect(sut.ReadOnly(true)).To(Equal(true)) }) }) + t.Describe("Restore", func() { + It("should not be restore by default", func() { + // Given + // When + // Then + Expect(sut.Restore()).To(BeFalse()) + }) + It("should be restore when specified", func() { + // Given + Expect(sut.Restore()).To(BeFalse()) + + // When + sut.SetRestore(true) + + // Then + Expect(sut.Restore()).To(BeTrue()) + }) + }) t.Describe("SelinuxLabel", func() { BeforeEach(func() { config.Linux = &types.LinuxContainerConfig{ diff --git a/internal/lib/checkpoint.go b/internal/lib/checkpoint.go new file mode 100644 index 00000000000..b0b0621282e --- /dev/null +++ b/internal/lib/checkpoint.go @@ -0,0 +1,229 @@ +package lib + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + + metadata "github.com/checkpoint-restore/checkpointctl/lib" + "github.com/checkpoint-restore/go-criu/v5/stats" + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/pkg/annotations" + "github.com/containers/podman/v4/pkg/checkpoint/crutils" + "github.com/containers/storage/pkg/archive" + "github.com/cri-o/cri-o/internal/oci" + rspec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/sirupsen/logrus" +) + +type ContainerCheckpointRestoreOptions struct { + Container string + Pod string + + libpod.ContainerCheckpointOptions +} + +// ContainerCheckpoint checkpoints a running container. +func (c *ContainerServer) ContainerCheckpoint(ctx context.Context, opts *ContainerCheckpointRestoreOptions) (string, error) { + ctr, err := c.LookupContainer(opts.Container) + if err != nil { + return "", fmt.Errorf("failed to find container %s: %w", opts.Container, err) + } + + configFile := filepath.Join(ctr.BundlePath(), "config.json") + specgen, err := generate.NewFromFile(configFile) + if err != nil { + return "", fmt.Errorf("not able to read config for container %q: %w", ctr.ID(), err) + } + + cStatus := ctr.State() + if cStatus.Status != oci.ContainerStateRunning { + return "", fmt.Errorf("container %s is not running", ctr.ID()) + } + + if opts.TargetFile != "" { + if err := c.prepareCheckpointExport(ctr); err != nil { + return "", fmt.Errorf("failed to write config dumps for container %s: %w", ctr.ID(), err) + } + } + + if err := c.runtime.CheckpointContainer(ctx, ctr, specgen.Config, opts.KeepRunning); err != nil { + return "", fmt.Errorf("failed to checkpoint container %s: %w", ctr.ID(), err) + } + if opts.TargetFile != "" { + if err := c.exportCheckpoint(ctr, specgen.Config, opts.TargetFile); err != nil { + return "", fmt.Errorf("failed to write file system changes of container %s: %w", ctr.ID(), err) + } + } + if err := c.storageRuntimeServer.StopContainer(ctr.ID()); err != nil { + return "", fmt.Errorf("failed to unmount container %s: %w", ctr.ID(), err) + } + if err := c.ContainerStateToDisk(ctx, ctr); err != nil { + logrus.Warnf("Unable to write containers %s state to disk: %v", ctr.ID(), err) + } + + if !opts.Keep { + cleanup := []string{ + metadata.DumpLogFile, + stats.StatsDump, + metadata.ConfigDumpFile, + metadata.SpecDumpFile, + } + for _, del := range cleanup { + file := filepath.Join(ctr.Dir(), del) + if err := os.Remove(file); err != nil { + logrus.Debugf("Unable to remove file %s", file) + } + } + } + + return ctr.ID(), nil +} + +// Copied from libpod/diff.go +var containerMounts = map[string]bool{ + "/dev": true, + "/etc/hostname": true, + "/etc/hosts": true, + "/etc/resolv.conf": true, + "/proc": true, + "/run": true, + "/run/.containerenv": true, + "/run/secrets": true, + "/sys": true, +} + +// getDiff returns the file system differences +// Copied from libpod/diff.go and simplified for the checkpoint use case +func (c *ContainerServer) getDiff(id string) (rchanges []archive.Change, err error) { + layerID, err := c.GetContainerTopLayerID(id) + if err != nil { + return nil, err + } + changes, err := c.store.Changes("", layerID) + if err == nil { + for _, c := range changes { + if containerMounts[c.Path] { + continue + } + rchanges = append(rchanges, c) + } + } + return rchanges, err +} + +// To make the checkpoint/restore code use the same fields as Podman: +type ContainerConfig struct { + ID string `json:"id"` + Name string `json:"name"` + RootfsImageName string `json:"rootfsImageName,omitempty"` +} + +// prepareCheckpointExport writes the config and spec to +// JSON files for later export +// Podman: libpod/container_internal.go +func (c *ContainerServer) prepareCheckpointExport(ctr *oci.Container) error { + config := &ContainerConfig{ + ID: ctr.ID(), + Name: ctr.Name(), + RootfsImageName: ctr.ImageName(), + } + + if _, err := metadata.WriteJSONFile(config, ctr.Dir(), metadata.ConfigDumpFile); err != nil { + return err + } + + // save spec + jsonPath := filepath.Join(ctr.BundlePath(), "config.json") + g, err := generate.NewFromFile(jsonPath) + if err != nil { + return fmt.Errorf("generating spec for container %q failed: %w", ctr.ID(), err) + } + if _, err := metadata.WriteJSONFile(g.Config, ctr.Dir(), metadata.SpecDumpFile); err != nil { + return fmt.Errorf("generating spec for container %q failed: %w", ctr.ID(), err) + } + + return nil +} + +func (c *ContainerServer) exportCheckpoint(ctr *oci.Container, specgen *rspec.Spec, export string) error { + id := ctr.ID() + dest := ctr.Dir() + logrus.Debugf("Exporting checkpoint image of container %q to %q", id, dest) + + includeFiles := []string{ + metadata.DumpLogFile, + metadata.CheckpointDirectory, + metadata.ConfigDumpFile, + metadata.SpecDumpFile, + } + + // To correctly track deleted files, let's go through the output of 'podman diff' + rootFsChanges, err := c.getDiff(id) + if err != nil { + return fmt.Errorf("error exporting root file-system diff for %q: %w", id, err) + } + mountPoint, err := c.StorageImageServer().GetStore().Mount(id, specgen.Linux.MountLabel) + if err != nil { + return fmt.Errorf("not able to get mountpoint for container %q: %w", id, err) + } + addToTarFiles, err := crutils.CRCreateRootFsDiffTar(&rootFsChanges, mountPoint, dest) + if err != nil { + return err + } + + // Put log file into checkpoint archive + _, err = os.Stat(specgen.Annotations[annotations.LogPath]) + if err == nil { + src, err := os.Open(specgen.Annotations[annotations.LogPath]) + if err != nil { + return fmt.Errorf("error opening log file %q: %w", specgen.Annotations[annotations.LogPath], err) + } + defer src.Close() + destLogPath := filepath.Join(dest, annotations.LogPath) + destLog, err := os.Create(destLogPath) + if err != nil { + return fmt.Errorf("error opening log file %q: %w", destLogPath, err) + } + defer destLog.Close() + _, err = io.Copy(destLog, src) + if err != nil { + return fmt.Errorf("copying log file to %q failed: %w", destLogPath, err) + } + addToTarFiles = append(addToTarFiles, annotations.LogPath) + } + + includeFiles = append(includeFiles, addToTarFiles...) + + input, err := archive.TarWithOptions(ctr.Dir(), &archive.TarOptions{ + // This should be configurable via api.proti + Compression: archive.Uncompressed, + IncludeSourceDir: true, + IncludeFiles: includeFiles, + }) + if err != nil { + return fmt.Errorf("error reading checkpoint directory %q: %w", id, err) + } + + // The resulting tar archive should not be readable by everyone as it contains + // every memory page of the checkpointed processes. + outFile, err := os.OpenFile(export, os.O_RDWR|os.O_CREATE, 0o600) + if err != nil { + return fmt.Errorf("error creating checkpoint export file %q: %w", export, err) + } + defer outFile.Close() + + _, err = io.Copy(outFile, input) + if err != nil { + return err + } + + for _, file := range addToTarFiles { + os.Remove(filepath.Join(dest, file)) + } + + return nil +} diff --git a/internal/lib/checkpoint_test.go b/internal/lib/checkpoint_test.go new file mode 100644 index 00000000000..cbf1a1f169e --- /dev/null +++ b/internal/lib/checkpoint_test.go @@ -0,0 +1,195 @@ +package lib_test + +import ( + "context" + "os" + + "github.com/containers/podman/v4/pkg/criu" + cstorage "github.com/containers/storage" + "github.com/containers/storage/pkg/archive" + "github.com/cri-o/cri-o/internal/lib" + "github.com/cri-o/cri-o/internal/oci" + "github.com/golang/mock/gomock" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// The actual test suite +var _ = t.Describe("ContainerCheckpoint", func() { + // Prepare the sut + BeforeEach(func() { + beforeEach() + createDummyConfig() + mockRuncInLibConfig() + if !criu.CheckForCriu(criu.PodCriuVersion) { + Skip("CRIU is missing or too old.") + } + }) + + AfterEach(func() { + os.RemoveAll("dump.log") + }) + + t.Describe("ContainerCheckpoint", func() { + It("should fail with container not running", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + + addContainerAndSandbox() + + opts.Container = containerID + + // When + res, err := sut.ContainerCheckpoint(context.Background(), &opts) + + // Then + Expect(err).NotTo(BeNil()) + Expect(res).To(Equal("")) + Expect(err.Error()).To(Equal(`container containerID is not running`)) + }) + }) + t.Describe("ContainerCheckpoint", func() { + It("should succeed", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + + addContainerAndSandbox() + opts.Container = containerID + + myContainer.SetState(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateRunning}, + }) + myContainer.SetSpec(&specs.Spec{Version: "1.0.0"}) + + gomock.InOrder( + storeMock.EXPECT().Container(gomock.Any()).Return(&cstorage.Container{}, nil), + storeMock.EXPECT().Unmount(gomock.Any(), gomock.Any()).Return(true, nil), + ) + + // When + res, err := sut.ContainerCheckpoint(context.Background(), &opts) + + // Then + Expect(err).To(BeNil()) + Expect(res).To(Equal(opts.Container)) + }) + }) + t.Describe("ContainerCheckpoint", func() { + It("should fail because runtime failure (/bin/false)", func() { + // Given + mockRuncToFalseInLibConfig() + var opts lib.ContainerCheckpointRestoreOptions + + addContainerAndSandbox() + opts.Container = containerID + + myContainer.SetState(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateRunning}, + }) + myContainer.SetSpec(&specs.Spec{Version: "1.0.0"}) + + // When + _, err := sut.ContainerCheckpoint(context.Background(), &opts) + + // Then + Expect(err).ToNot(BeNil()) + Expect(err.Error()).To(ContainSubstring(`failed to checkpoint container containerID`)) + }) + }) + t.Describe("ContainerCheckpoint", func() { + It("should fail with export", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + + addContainerAndSandbox() + opts.Container = containerID + opts.TargetFile = "cp.tar" + defer os.RemoveAll("cp.tar") + + myContainer.SetState(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateRunning}, + }) + myContainer.SetSpec(&specs.Spec{Version: "1.0.0"}) + + gomock.InOrder( + storeMock.EXPECT().Container(gomock.Any()).Return(&cstorage.Container{}, nil), + storeMock.EXPECT().Changes(gomock.Any(), gomock.Any()).Return([]archive.Change{{Kind: archive.ChangeDelete, Path: "deleted.file"}}, nil), + storeMock.EXPECT().Mount(gomock.Any(), gomock.Any()).Return("/tmp/", nil), + storeMock.EXPECT().Container(gomock.Any()).Return(&cstorage.Container{}, nil), + storeMock.EXPECT().Unmount(gomock.Any(), gomock.Any()).Return(true, nil), + ) + + // When + res, err := sut.ContainerCheckpoint(context.Background(), &opts) + + // Then + Expect(err).To(BeNil()) + Expect(res).To(ContainSubstring(opts.Container)) + }) + }) + t.Describe("ContainerCheckpoint", func() { + It("should fail during unmount", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + + addContainerAndSandbox() + opts.Container = containerID + + myContainer.SetState(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateRunning}, + }) + myContainer.SetSpec(&specs.Spec{Version: "1.0.0"}) + + gomock.InOrder( + storeMock.EXPECT().Container(gomock.Any()).Return(&cstorage.Container{}, nil), + storeMock.EXPECT().Unmount(gomock.Any(), gomock.Any()).Return(true, t.TestError), + ) + + // When + _, err := sut.ContainerCheckpoint(context.Background(), &opts) + + // Then + Expect(err.Error()).To(Equal(`failed to unmount container containerID: error`)) + }) + }) +}) + +var _ = t.Describe("ContainerCheckpoint", func() { + // Prepare the sut + BeforeEach(beforeEach) + + t.Describe("ContainerCheckpoint", func() { + It("should fail with invalid container ID", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + opts.Container = "invalid" + + // When + res, err := sut.ContainerCheckpoint(context.Background(), &opts) + + // Then + Expect(err).NotTo(BeNil()) + Expect(res).To(Equal("")) + Expect(err.Error()).To(Equal(`failed to find container invalid: container with ID starting with invalid not found: ID does not exist`)) + }) + }) + t.Describe("ContainerCheckpoint", func() { + It("should fail with invalid config", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + + addContainerAndSandbox() + + opts.Container = containerID + + // When + res, err := sut.ContainerCheckpoint(context.Background(), &opts) + + // Then + Expect(err).NotTo(BeNil()) + Expect(res).To(Equal("")) + Expect(err.Error()).To(Equal(`not able to read config for container "containerID": template configuration at config.json not found`)) + }) + }) +}) diff --git a/internal/lib/container.go b/internal/lib/container.go index a406f87030f..2679d19f66e 100644 --- a/internal/lib/container.go +++ b/internal/lib/container.go @@ -3,10 +3,30 @@ package lib import ( "fmt" + cstorage "github.com/containers/storage" + "github.com/cri-o/cri-o/internal/lib/sandbox" "github.com/cri-o/cri-o/internal/oci" "github.com/cri-o/cri-o/internal/registrar" ) +// GetStorageContainer searches for a container with the given name or ID in the given store +func (c *ContainerServer) GetStorageContainer(container string) (*cstorage.Container, error) { + ociCtr, err := c.LookupContainer(container) + if err != nil { + return nil, err + } + return c.store.Container(ociCtr.ID()) +} + +// GetContainerTopLayerID gets the ID of the top layer of the given container +func (c *ContainerServer) GetContainerTopLayerID(containerID string) (string, error) { + ctr, err := c.GetStorageContainer(containerID) + if err != nil { + return "", err + } + return ctr.LayerID, nil +} + // GetContainerFromShortID gets an oci container matching the specified full or partial id func (c *ContainerServer) GetContainerFromShortID(cid string) (*oci.Container, error) { if cid == "" { @@ -47,3 +67,38 @@ func (c *ContainerServer) LookupContainer(idOrName string) (*oci.Container, erro return c.GetContainerFromShortID(ctrID) } + +func (c *ContainerServer) getSandboxFromRequest(pid string) (*sandbox.Sandbox, error) { + if pid == "" { + return nil, fmt.Errorf("pod ID should not be empty") + } + + podID, err := c.podIDIndex.Get(pid) + if err != nil { + return nil, fmt.Errorf("pod with ID starting with %s not found: %v", pid, err) + } + + sb := c.GetSandbox(podID) + if sb == nil { + return nil, fmt.Errorf("specified pod not found: %s", podID) + } + return sb, nil +} + +// LookupSandbox returns the pod sandbox with the given name or full or partial id +func (c *ContainerServer) LookupSandbox(idOrName string) (*sandbox.Sandbox, error) { + if idOrName == "" { + return nil, fmt.Errorf("container ID or name should not be empty") + } + + podID, err := c.podNameIndex.Get(idOrName) + if err != nil { + if err == registrar.ErrNameNotReserved { + podID = idOrName + } else { + return nil, err + } + } + + return c.getSandboxFromRequest(podID) +} diff --git a/internal/lib/restore.go b/internal/lib/restore.go new file mode 100644 index 00000000000..0fea0247687 --- /dev/null +++ b/internal/lib/restore.go @@ -0,0 +1,153 @@ +package lib + +import ( + "context" + "fmt" + "os" + "path/filepath" + + metadata "github.com/checkpoint-restore/checkpointctl/lib" + "github.com/checkpoint-restore/go-criu/v5/stats" + "github.com/containers/podman/v4/pkg/checkpoint/crutils" + "github.com/cri-o/cri-o/internal/oci" + "github.com/opencontainers/runtime-tools/generate" + "github.com/sirupsen/logrus" +) + +// ContainerRestore restores a checkpointed container. +func (c *ContainerServer) ContainerRestore(ctx context.Context, opts *ContainerCheckpointRestoreOptions) (string, error) { + var ctr *oci.Container + var err error + ctr, err = c.LookupContainer(opts.Container) + if err != nil { + return "", fmt.Errorf("failed to find container %s: %w", opts.Container, err) + } + + cStatus := ctr.State() + if cStatus.Status == oci.ContainerStateRunning { + return "", fmt.Errorf("cannot restore running container %s", ctr.ID()) + } + + // Get config.json + configFile := filepath.Join(ctr.Dir(), "config.json") + ctrSpec, err := generate.NewFromFile(configFile) + if err != nil { + return "", err + } + // During checkpointing the container is unmounted. This mounts the container again. + mountPoint, err := c.StorageImageServer().GetStore().Mount(ctr.ID(), ctrSpec.Config.Linux.MountLabel) + if err != nil { + logrus.Debugf("Failed to mount container %q: %v", ctr.ID(), err) + return "", err + } + logrus.Debugf("Container mountpoint %v", mountPoint) + logrus.Debugf("Sandbox %v", ctr.Sandbox()) + logrus.Debugf("Specgen.Config.Annotations[io.kubernetes.cri-o.SandboxID] %v", ctrSpec.Config.Annotations["io.kubernetes.cri-o.SandboxID"]) + // If there was no podID specified this will restore the container + // in its original sandbox + if opts.Pod == "" { + opts.Pod = ctr.Sandbox() + } + sb, err := c.LookupSandbox(opts.Pod) + if err != nil { + return "", err + } + ic := sb.InfraContainer() + if ic == nil { + return "", fmt.Errorf("infra container of sandbox %v not found", sb.Name()) + } + infraConfigFile := filepath.Join(ic.BundlePath(), "config.json") + specgen, err := generate.NewFromFile(infraConfigFile) + if err != nil { + return "", err + } + + if ctr.RestoreArchive() != "" { + if err := crutils.CRImportCheckpointWithoutConfig(ctr.Dir(), ctr.RestoreArchive()); err != nil { + return "", err + } + if err := c.restoreFileSystemChanges(ctr, mountPoint); err != nil { + return "", err + } + for _, m := range ctrSpec.Config.Mounts { + // This checks if all bind mount sources exist. + // We cannot create missing bind mount sources automatically + // as the source and destination need to be of the same type. + // CRIU will fail restoring if the external bind mount source + // is a directory but the internal destination is a file. + // As destinations can be in nested bind mounts, which are only + // correctly setup by runc/crun during container restore, we + // cannot figure out the file type of the destination. + // At this point we will fail and tell the user to create + // the missing bind mount source file/directory. + if m.Type != "bind" { + continue + } + _, err := os.Lstat(m.Source) + if err != nil { + return "", fmt.Errorf( + "the bind mount source %s is missing. %s", + m.Source, + "Please create the corresponding file or directory", + ) + } + } + } + + if err := c.runtime.RestoreContainer(ctx, ctr, specgen.Config, ic.State().Pid, sb.CgroupParent()); err != nil { + return "", fmt.Errorf("failed to restore container %s: %w", ctr.ID(), err) + } + if err := c.ContainerStateToDisk(ctx, ctr); err != nil { + logrus.Warnf("Unable to write containers %s state to disk: %v", ctr.ID(), err) + } + + if !opts.Keep { + // Delete all checkpoint related files. At this point, in theory, all files + // should exist. Still ignoring errors for now as the container should be + // restored and running. Not erroring out just because some cleanup operation + // failed. Starting with the checkpoint directory + err = os.RemoveAll(ctr.CheckpointPath()) + if err != nil { + logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", ctr.CheckpointPath(), err) + } + cleanup := [...]string{ + metadata.RestoreLogFile, + metadata.DumpLogFile, + stats.StatsDump, + stats.StatsRestore, + metadata.NetworkStatusFile, + metadata.RootFsDiffTar, + metadata.DeletedFilesFile, + } + for _, del := range cleanup { + var file string + if del == metadata.RestoreLogFile || del == stats.StatsRestore { + // Checkpointing uses runc and it is possible to tell runc + // the location of the log file using '--work-path'. + // Restore goes through conmon and conmon does (not yet?) + // expose runc's '--work-path' which means that temporary + // restore files are put into BundlePath(). + file = filepath.Join(ctr.BundlePath(), del) + } else { + file = filepath.Join(ctr.Dir(), del) + } + err = os.Remove(file) + if err != nil { + logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err) + } + } + } + + return ctr.ID(), nil +} + +func (c *ContainerServer) restoreFileSystemChanges(ctr *oci.Container, mountPoint string) error { + if err := crutils.CRApplyRootFsDiffTar(ctr.Dir(), mountPoint); err != nil { + return err + } + + if err := crutils.CRRemoveDeletedFiles(ctr.ID(), ctr.Dir(), mountPoint); err != nil { + return err + } + return nil +} diff --git a/internal/lib/restore_test.go b/internal/lib/restore_test.go new file mode 100644 index 00000000000..d1f6fa0aa5d --- /dev/null +++ b/internal/lib/restore_test.go @@ -0,0 +1,239 @@ +package lib_test + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "time" + + "github.com/containers/podman/v4/pkg/criu" + "github.com/containers/storage/pkg/archive" + "github.com/cri-o/cri-o/internal/lib" + "github.com/cri-o/cri-o/internal/oci" + "github.com/golang/mock/gomock" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + types "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +var _ = t.Describe("ContainerRestore", func() { + // Prepare the sut + BeforeEach(func() { + beforeEach() + createDummyConfig() + mockRuncInLibConfigCheckpoint() + if !criu.CheckForCriu(criu.PodCriuVersion) { + Skip("CRIU is missing or too old.") + } + }) + + t.Describe("ContainerRestore", func() { + It("should fail with invalid container ID", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + opts.Container = "invalid" + + // When + res, err := sut.ContainerRestore(context.Background(), &opts) + + // Then + Expect(err).NotTo(BeNil()) + Expect(res).To(Equal("")) + Expect(err.Error()).To(Equal(`failed to find container invalid: container with ID starting with invalid not found: ID does not exist`)) + }) + }) + t.Describe("ContainerRestore", func() { + It("should fail with container not running", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + + addContainerAndSandbox() + + myContainer.SetState(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateRunning}, + }) + myContainer.SetSpec(&specs.Spec{Version: "1.0.0"}) + + opts.Container = containerID + + // When + res, err := sut.ContainerRestore(context.Background(), &opts) + + // Then + Expect(err).NotTo(BeNil()) + Expect(res).To(Equal("")) + Expect(err.Error()).To(Equal(`cannot restore running container containerID`)) + }) + }) + t.Describe("ContainerRestore", func() { + It("should fail with invalid config", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + + addContainerAndSandbox() + opts.Container = containerID + + gomock.InOrder( + storeMock.EXPECT().Mount(gomock.Any(), gomock.Any()).Return("/tmp/", nil), + ) + + // When + res, err := sut.ContainerRestore(context.Background(), &opts) + + // Then + Expect(err).NotTo(BeNil()) + Expect(res).To(Equal("")) + Expect(err.Error()).To(Equal(`infra container of sandbox not found`)) + }) + }) + t.Describe("ContainerRestore", func() { + It("should fail with failed to restore container", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + + createDummyConfig() + addContainerAndSandbox() + opts.Container = containerID + myContainer.SetState(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateStopped}, + }) + myContainer.SetSpec(&specs.Spec{ + Version: "1.0.0", + Process: &specs.Process{}, + Linux: &specs.Linux{}, + }) + + gomock.InOrder( + storeMock.EXPECT().Mount(gomock.Any(), gomock.Any()).Return("/tmp/", nil), + ) + + err := os.Mkdir("bundle", 0o700) + Expect(err).To(BeNil()) + setupInfraContainerWithPid(42, "bundle") + defer os.RemoveAll("bundle") + err = os.Mkdir("checkpoint", 0o700) + Expect(err).To(BeNil()) + defer os.RemoveAll("checkpoint") + inventory, err := os.OpenFile("checkpoint/inventory.img", os.O_RDONLY|os.O_CREATE, 0o644) + Expect(err).To(BeNil()) + inventory.Close() + // When + res, err := sut.ContainerRestore(context.Background(), &opts) + + defer os.RemoveAll("restore.log") + // Then + Expect(err).NotTo(BeNil()) + Expect(res).To(Equal("")) + Expect(err.Error()).To(ContainSubstring(`failed to restore container containerID`)) + }) + }) + t.Describe("ContainerRestore", func() { + It("should fail with failed to restore", func() { + // Given + var opts lib.ContainerCheckpointRestoreOptions + + opts.Container = containerID + + createDummyConfig() + addContainerAndSandbox() + + myContainer.SetStateAndSpoofPid(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateStopped}, + }) + + myContainer.SetSpec(&specs.Spec{ + Version: "1.0.0", + Process: &specs.Process{}, + Linux: &specs.Linux{}, + }) + + gomock.InOrder( + storeMock.EXPECT().Mount(gomock.Any(), gomock.Any()).Return("/tmp/", nil), + ) + + err := os.WriteFile("spec.dump", []byte(`{"annotations":{"io.kubernetes.cri-o.Metadata":"{\"name\":\"container-to-restore\"}"}}`), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("spec.dump") + err = os.WriteFile("config.dump", []byte(`{"rootfsImageName": "image"}`), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("config.dump") + + err = os.Mkdir("checkpoint", 0o700) + Expect(err).To(BeNil()) + defer os.RemoveAll("checkpoint") + inventory, err := os.OpenFile("checkpoint/inventory.img", os.O_RDONLY|os.O_CREATE, 0o644) + Expect(err).To(BeNil()) + inventory.Close() + + rootfs, err := os.OpenFile("rootfs-diff.tar", os.O_RDONLY|os.O_CREATE, 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("rootfs-diff.tar") + rootfs.Close() + + err = os.WriteFile("deleted.files", []byte(`{}`), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("deleted.files") + + outFile, err := os.Create("archive.tar") + Expect(err).To(BeNil()) + defer outFile.Close() + input, err := archive.TarWithOptions(".", &archive.TarOptions{ + Compression: archive.Uncompressed, + IncludeSourceDir: true, + IncludeFiles: []string{"spec.dump", "config.dump", "checkpoint", "deleted.files"}, + }) + Expect(err).To(BeNil()) + defer os.RemoveAll("archive.tar") + _, err = io.Copy(outFile, input) + Expect(err).To(BeNil()) + + opts.TargetFile = "archive.tar" + err = os.Mkdir("bundle", 0o700) + Expect(err).To(BeNil()) + setupInfraContainerWithPid(42, "bundle") + defer os.RemoveAll("bundle") + + // When + res, err := sut.ContainerRestore(context.Background(), &opts) + + // Then + Expect(err).NotTo(BeNil()) + Expect(res).To(Equal("")) + fmt.Printf("%#v\n", config.Runtimes) + Expect(err.Error()).To(ContainSubstring(`failed to restore container containerID: failed to`)) + }) + }) +}) + +func setupInfraContainerWithPid(pid int, bundle string) { + testContainer, err := oci.NewContainer("testid", "testname", bundle, + "/container/logs", map[string]string{}, + map[string]string{}, map[string]string{}, "image", + "imageName", "imageRef", &types.ContainerMetadata{}, + "testsandboxid", false, false, false, "", + "/root/for/container", time.Now(), "SIGKILL") + Expect(err).To(BeNil()) + Expect(testContainer).NotTo(BeNil()) + + cstate := &oci.ContainerState{} + cstate.State = specs.State{ + Pid: pid, + } + // eat error here because callers may send invalid pids to test against + _ = cstate.SetInitPid(pid) // nolint:errcheck + testContainer.SetState(cstate) + testContainer.SetSpec(&specs.Spec{ + Version: "1.0.0", + Annotations: map[string]string{"io.kubernetes.cri-o.SandboxID": "sandboxID"}, + }) + spec := testContainer.Spec() + g := generate.Generator{Config: &spec} + err = g.SaveToFile(filepath.Join(bundle, "config.json"), generate.ExportOptions{}) + Expect(err).To(BeNil()) + + Expect(mySandbox.SetInfraContainer(testContainer)).To(BeNil()) +} diff --git a/internal/lib/sandbox/sandbox.go b/internal/lib/sandbox/sandbox.go index 0d685a9979d..6c43ba63acf 100644 --- a/internal/lib/sandbox/sandbox.go +++ b/internal/lib/sandbox/sandbox.go @@ -53,6 +53,7 @@ type Sandbox struct { seccompProfilePath string infraContainer *oci.Container nsOpts *types.NamespaceOption + dnsConfig *types.DNSConfig stopMutex sync.RWMutex created bool stopped bool @@ -144,6 +145,16 @@ func (s *Sandbox) NamespaceOptions() *types.NamespaceOption { return s.nsOpts } +// SetDNSConfig sets the DNSConfig +func (s *Sandbox) SetDNSConfig(dnsConfig *types.DNSConfig) { + s.dnsConfig = dnsConfig +} + +// DNSConfig returns the dnsConfig for the sandbox +func (s *Sandbox) DNSConfig() *types.DNSConfig { + return s.dnsConfig +} + // StopMutex returns the mutex to use when stopping the sandbox func (s *Sandbox) StopMutex() *sync.RWMutex { return &s.stopMutex diff --git a/internal/lib/sandbox/sandbox_test.go b/internal/lib/sandbox/sandbox_test.go index a047fb231c0..48aa65a967b 100644 --- a/internal/lib/sandbox/sandbox_test.go +++ b/internal/lib/sandbox/sandbox_test.go @@ -126,6 +126,24 @@ var _ = t.Describe("Sandbox", func() { }) }) + t.Describe("DNSConfig", func() { + It("should succeed", func() { + // Given + Expect(testSandbox.DNSConfig()).To(BeNil()) + dnsConfig := types.DNSConfig{ + Servers: []string{"server1", "server2"}, + Searches: []string{"searche1", "searches"}, + Options: []string{"option1", "option2"}, + } + + // When + testSandbox.SetDNSConfig(&dnsConfig) + + // Then + Expect(testSandbox.DNSConfig()).To(Equal(&dnsConfig)) + }) + }) + t.Describe("Created", func() { It("should succeed", func() { // Given diff --git a/internal/lib/sandbox/sandbox_test_inject.go b/internal/lib/sandbox/sandbox_test_inject.go new file mode 100644 index 00000000000..81b96bbba9e --- /dev/null +++ b/internal/lib/sandbox/sandbox_test_inject.go @@ -0,0 +1,16 @@ +//go:build test +// +build test + +// All *_inject.go files are meant to be used by tests only. Purpose of this +// files is to provide a way to inject mocked data into the current setup. + +package sandbox + +import ( + "github.com/cri-o/cri-o/internal/hostport" +) + +// SetPortMappings sets the PortMappings for the Sandbox +func (s *Sandbox) SetPortMappings(portMappings []*hostport.PortMapping) { + s.portMappings = portMappings +} diff --git a/internal/lib/suite_test.go b/internal/lib/suite_test.go index 89e744ebb27..b3be5d4f5b4 100644 --- a/internal/lib/suite_test.go +++ b/internal/lib/suite_test.go @@ -30,6 +30,7 @@ func TestLib(t *testing.T) { var ( t *TestFramework + config *libconfig.Config mockCtrl *gomock.Controller libMock *libmock.MockIface storeMock *containerstoragemock.MockStore @@ -106,23 +107,32 @@ var _ = BeforeSuite(func() { }) var _ = AfterSuite(func() { + removeConfig() t.Teardown() mockCtrl.Finish() + _ = os.RemoveAll("/tmp/fake-runtime") }) func removeState() { _ = os.RemoveAll("state.json") } +func removeConfig() { + _ = os.RemoveAll("config.json") +} + func beforeEach() { // Remove old state files removeState() + // Remove old config files + removeConfig() // Only log panics for now logrus.SetLevel(logrus.PanicLevel) // Set the config - config, err := libconfig.DefaultConfig() + var err error + config, err = libconfig.DefaultConfig() Expect(err).To(BeNil()) config.LogDir = "." config.HooksDir = []string{} @@ -177,3 +187,27 @@ func addContainerAndSandbox() { func createDummyState() { Expect(os.WriteFile("state.json", []byte("{}"), 0o644)).To(BeNil()) } + +func createDummyConfig() { + Expect(os.WriteFile("config.json", []byte(`{"linux":{},"process":{}}`), 0o644)).To(BeNil()) +} + +func mockRuncInLibConfig() { + config.Runtimes["runc"] = &libconfig.RuntimeHandler{ + RuntimePath: "/bin/echo", + } +} + +func mockRuncInLibConfigCheckpoint() { + Expect(os.WriteFile("/tmp/fake-runtime", []byte("#!/bin/bash\n\necho flag needs an argument\nexit 0\n"), 0o755)).To(BeNil()) + config.Runtimes["runc"] = &libconfig.RuntimeHandler{ + RuntimePath: "/tmp/fake-runtime", + MonitorPath: "/bin/true", + } +} + +func mockRuncToFalseInLibConfig() { + config.Runtimes["runc"] = &libconfig.RuntimeHandler{ + RuntimePath: "/bin/false", + } +} diff --git a/internal/oci/container.go b/internal/oci/container.go index 8d0d6fad485..a6a2f47728a 100644 --- a/internal/oci/container.go +++ b/internal/oci/container.go @@ -12,6 +12,7 @@ import ( "syscall" "time" + metadata "github.com/checkpoint-restore/checkpointctl/lib" "github.com/containers/common/pkg/cgroups" "github.com/containers/common/pkg/signal" "github.com/containers/storage/pkg/idtools" @@ -73,6 +74,8 @@ type Container struct { stopStoppingChan chan struct{} stopLock sync.Mutex pidns nsmgr.Namespace + restore bool + restoreArchive string } func (c *Container) CRIAttributes() *types.ContainerAttributes { @@ -110,7 +113,7 @@ type ContainerState struct { } // NewContainer creates a container object. -func NewContainer(id, name, bundlePath, logPath string, labels, crioAnnotations, annotations map[string]string, image, imageName, imageRef string, metadata *types.ContainerMetadata, sandbox string, terminal, stdin, stdinOnce bool, runtimeHandler, dir string, created time.Time, stopSignal string) (*Container, error) { +func NewContainer(id, name, bundlePath, logPath string, labels, crioAnnotations, annotations map[string]string, image, imageName, imageRef string, md *types.ContainerMetadata, sandbox string, terminal, stdin, stdinOnce bool, runtimeHandler, dir string, created time.Time, stopSignal string) (*Container, error) { state := &ContainerState{} state.Created = created c := &Container{ @@ -119,7 +122,7 @@ func NewContainer(id, name, bundlePath, logPath string, labels, crioAnnotations, PodSandboxId: sandbox, CreatedAt: created.UnixNano(), Labels: labels, - Metadata: metadata, + Metadata: md, Annotations: annotations, Image: &types.ImageSpec{ Image: image, @@ -370,11 +373,27 @@ func (c *Container) Sandbox() string { return c.criContainer.PodSandboxId } +// SetSandbox sets the ID of the Sandbox. +func (c *Container) SetSandbox(podSandboxID string) { + c.criContainer.PodSandboxId = podSandboxID +} + // Dir returns the dir of the container func (c *Container) Dir() string { return c.dir } +// CheckpointPath returns the path to the directory containing the checkpoint +func (c *Container) CheckpointPath() string { + // Podman uses 'bundlePath' as base directory for the checkpoint + // CRI-O uses 'dir' instead of bundlePath as bundlePath seems to be + // normally based on a tmpfs which does not survive a reboot. Also, as + // the checkpoint contains all memory pages, it can be as large as the + // available memory and writing that again to a tmpfs might lead to + // problems. 'dir' seems to be based on /var + return filepath.Join(c.dir, metadata.CheckpointDirectory) +} + // Metadata returns the metadata of the container. func (c *Container) Metadata() *types.ContainerMetadata { return c.criContainer.Metadata @@ -643,3 +662,22 @@ func (c *Container) nodeLevelPIDNamespace() bool { } return true } + +// Restore returns if the container is marked as being +// restored from a checkpoint +func (c *Container) Restore() bool { + return c.restore +} + +// SetRestore marks the container as being restored from a checkpoint +func (c *Container) SetRestore(restore bool) { + c.restore = restore +} + +func (c *Container) RestoreArchive() string { + return c.restoreArchive +} + +func (c *Container) SetRestoreArchive(restoreArchive string) { + c.restoreArchive = restoreArchive +} diff --git a/internal/oci/container_test.go b/internal/oci/container_test.go index 174f8ba4061..891c7a42603 100644 --- a/internal/oci/container_test.go +++ b/internal/oci/container_test.go @@ -47,6 +47,7 @@ var _ = t.Describe("Container", func() { Expect(sut.ImageRef()).To(Equal("imageRef")) Expect(sut.Sandbox()).To(Equal("sandbox")) Expect(sut.Dir()).To(Equal("dir")) + Expect(sut.CheckpointPath()).To(Equal("dir/checkpoint")) Expect(sut.StatePath()).To(Equal("dir/state.json")) Expect(sut.Metadata()).To(Equal(&types.ContainerMetadata{})) Expect(sut.StateNoLock().Version).To(BeEmpty()) diff --git a/internal/oci/oci.go b/internal/oci/oci.go index 798be3f9794..cce99f6d363 100644 --- a/internal/oci/oci.go +++ b/internal/oci/oci.go @@ -56,7 +56,7 @@ type Runtime struct { // runtimes. Assumptions based on the fact that a container process runs // on the host will be limited to the RuntimeOCI implementation. type RuntimeImpl interface { - CreateContainer(context.Context, *Container, string) error + CreateContainer(context.Context, *Container, string, bool) error StartContainer(context.Context, *Container) error ExecContainer(context.Context, *Container, []string, io.Reader, io.WriteCloser, io.WriteCloser, bool, <-chan remotecommand.TerminalSize) error @@ -74,6 +74,8 @@ type RuntimeImpl interface { PortForwardContainer(context.Context, *Container, string, int32, io.ReadWriteCloser) error ReopenContainerLog(context.Context, *Container) error + CheckpointContainer(context.Context, *Container, *rspec.Spec, bool) error + RestoreContainer(context.Context, *Container, *rspec.Spec, int, string) error } // New creates a new Runtime with options provided @@ -205,7 +207,7 @@ func (r *Runtime) RuntimeImpl(c *Container) (RuntimeImpl, error) { } // CreateContainer creates a container. -func (r *Runtime) CreateContainer(ctx context.Context, c *Container, cgroupParent string) error { +func (r *Runtime) CreateContainer(ctx context.Context, c *Container, cgroupParent string, restore bool) error { // Instantiate a new runtime implementation for this new container impl, err := r.newRuntimeImpl(c) if err != nil { @@ -217,7 +219,7 @@ func (r *Runtime) CreateContainer(ctx context.Context, c *Container, cgroupParen r.runtimeImplMap[c.ID()] = impl r.runtimeImplMapMutex.Unlock() - return impl.CreateContainer(ctx, c, cgroupParent) + return impl.CreateContainer(ctx, c, cgroupParent, restore) } // StartContainer starts a container. @@ -383,3 +385,23 @@ type ExecSyncError struct { func (e *ExecSyncError) Error() string { return fmt.Sprintf("command error: %+v, stdout: %s, stderr: %s, exit code %d", e.Err, e.Stdout.Bytes(), e.Stderr.Bytes(), e.ExitCode) } + +// CheckpointContainer checkpoints a container. +func (r *Runtime) CheckpointContainer(ctx context.Context, c *Container, specgen *rspec.Spec, leaveRunning bool) error { + impl, err := r.RuntimeImpl(c) + if err != nil { + return err + } + + return impl.CheckpointContainer(ctx, c, specgen, leaveRunning) +} + +// RestoreContainer restores a container. +func (r *Runtime) RestoreContainer(ctx context.Context, c *Container, sbSpec *rspec.Spec, infraPid int, cgroupParent string) error { + impl, err := r.RuntimeImpl(c) + if err != nil { + return err + } + + return impl.RestoreContainer(ctx, c, sbSpec, infraPid, cgroupParent) +} diff --git a/internal/oci/oci_test.go b/internal/oci/oci_test.go index 4c65b68f092..b622daee0b4 100644 --- a/internal/oci/oci_test.go +++ b/internal/oci/oci_test.go @@ -1,11 +1,16 @@ package oci_test import ( + "context" + "os" + + "github.com/containers/podman/v4/pkg/criu" "github.com/cri-o/cri-o/internal/oci" "github.com/cri-o/cri-o/pkg/annotations" - "github.com/cri-o/cri-o/pkg/config" + libconfig "github.com/cri-o/cri-o/pkg/config" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + specs "github.com/opencontainers/runtime-spec/specs-go" ) // The actual test suite @@ -13,7 +18,7 @@ var _ = t.Describe("Oci", func() { t.Describe("New", func() { It("should succeed with default config", func() { // Given - c, err := config.DefaultConfig() + c, err := libconfig.DefaultConfig() Expect(err).To(BeNil()) // so we have permission to make a directory within it c.ContainerAttachSocketDir = t.MustTempDir("crio") @@ -39,7 +44,7 @@ var _ = t.Describe("Oci", func() { performanceRuntime = "high-performance" vmRuntime = "kata" ) - runtimes := config.Runtimes{ + runtimes := libconfig.Runtimes{ defaultRuntime: { RuntimePath: "/bin/sh", RuntimeType: "", @@ -73,14 +78,15 @@ var _ = t.Describe("Oci", func() { } BeforeEach(func() { - c, err := config.DefaultConfig() + var err error + config, err = libconfig.DefaultConfig() Expect(err).To(BeNil()) - c.DefaultRuntime = defaultRuntime - c.Runtimes = runtimes + config.DefaultRuntime = defaultRuntime + config.Runtimes = runtimes // so we have permission to make a directory within it - c.ContainerAttachSocketDir = t.MustTempDir("crio") + config.ContainerAttachSocketDir = t.MustTempDir("crio") - sut, err = oci.New(c) + sut, err = oci.New(config) Expect(err).To(BeNil()) Expect(sut).NotTo(BeNil()) }) @@ -119,7 +125,7 @@ var _ = t.Describe("Oci", func() { // Then Expect(err).To(BeNil()) - Expect(runtimeType).To(Equal(config.RuntimeTypeVM)) + Expect(runtimeType).To(Equal(libconfig.RuntimeTypeVM)) }) Context("AllowedAnnotations", func() { It("should succeed to return allowed annotation", func() { @@ -171,5 +177,180 @@ var _ = t.Describe("Oci", func() { Expect(err).To(BeNil()) Expect(privileged).To(Equal(false)) }) + It("CheckpointContainer should succeed", func() { + if !criu.CheckForCriu(criu.PodCriuVersion) { + Skip("CRIU is missing or too old.") + } + // Given + beforeEach(sandboxID) + defer os.RemoveAll("dump.log") + config.Runtimes["runc"] = &libconfig.RuntimeHandler{ + RuntimePath: "/bin/true", + } + + specgen := &specs.Spec{ + Version: "1.0.0", + Process: &specs.Process{ + SelinuxLabel: "", + }, + Linux: &specs.Linux{ + MountLabel: "", + }, + } + // When + err := sut.CheckpointContainer(context.Background(), myContainer, specgen, false) + + // Then + Expect(err).To(BeNil()) + }) + It("CheckpointContainer should fail", func() { + if !criu.CheckForCriu(criu.PodCriuVersion) { + Skip("CRIU is missing or too old.") + } + // Given + defer os.RemoveAll("dump.log") + beforeEach(sandboxID) + config.Runtimes["runc"] = &libconfig.RuntimeHandler{ + RuntimePath: "/bin/false", + } + + specgen := &specs.Spec{ + Version: "1.0.0", + Process: &specs.Process{ + SelinuxLabel: "", + }, + Linux: &specs.Linux{ + MountLabel: "", + }, + } + // When + err := sut.CheckpointContainer(context.Background(), myContainer, specgen, true) + + // Then + Expect(err).NotTo(BeNil()) + Expect(err.Error()).To(Equal("configured runtime does not support checkpoint/restore")) + }) + It("RestoreContainer should fail with destination sandbox detection", func() { + if !criu.CheckForCriu(criu.PodCriuVersion) { + Skip("CRIU is missing or too old.") + } + // Given + beforeEach(sandboxID) + config.Runtimes["runc"] = &libconfig.RuntimeHandler{ + RuntimePath: "/bin/true", + } + + specgen := &specs.Spec{ + Version: "1.0.0", + } + err := os.Mkdir("checkpoint", 0o700) + Expect(err).To(BeNil()) + defer os.RemoveAll("checkpoint") + inventory, err := os.OpenFile("checkpoint/inventory.img", os.O_RDONLY|os.O_CREATE, 0o644) + Expect(err).To(BeNil()) + inventory.Close() + + // When + err = sut.RestoreContainer(context.Background(), myContainer, specgen, 42, "no-parent-cgroup-exists") + + // Then + Expect(err).NotTo(BeNil()) + Expect(err.Error()).To(Equal("failed to detect destination sandbox of to be restored container containerID")) + }) + It("RestoreContainer should fail with destination sandbox detection", func() { + if !criu.CheckForCriu(criu.PodCriuVersion) { + Skip("CRIU is missing or too old.") + } + // Given + beforeEach("") + specgen := &specs.Spec{ + Version: "1.0.0", + } + err := os.Mkdir("checkpoint", 0o700) + Expect(err).To(BeNil()) + defer os.RemoveAll("checkpoint") + inventory, err := os.OpenFile("checkpoint/inventory.img", os.O_RDONLY|os.O_CREATE, 0o644) + Expect(err).To(BeNil()) + inventory.Close() + + // When + err = sut.RestoreContainer(context.Background(), myContainer, specgen, 42, "no-parent-cgroup-exists") + + // Then + Expect(err).NotTo(BeNil()) + Expect(err.Error()).To(Equal("failed to detect sandbox of to be restored container containerID")) + }) + It("RestoreContainer should fail", func() { + if !criu.CheckForCriu(criu.PodCriuVersion) { + Skip("CRIU is missing or too old.") + } + // Given + beforeEach(sandboxID) + config.Runtimes["runc"] = &libconfig.RuntimeHandler{ + RuntimePath: "/bin/true", + MonitorPath: "/bin/true", + } + + specgen := &specs.Spec{ + Version: "1.0.0", + Annotations: map[string]string{"io.kubernetes.cri-o.SandboxID": "sandboxID"}, + Linux: &specs.Linux{ + MountLabel: ".", + }, + Process: &specs.Process{ + SelinuxLabel: "", + }, + } + myContainer.SetSpec(specgen) + + err := os.Mkdir("checkpoint", 0o700) + Expect(err).To(BeNil()) + defer os.RemoveAll("checkpoint") + inventory, err := os.OpenFile("checkpoint/inventory.img", os.O_RDONLY|os.O_CREATE, 0o644) + Expect(err).To(BeNil()) + inventory.Close() + + err = os.WriteFile( + "config.json", + []byte( + `{"ociVersion": "1.0.0","annotations":`+ + `{"io.kubernetes.cri-o.SandboxID": "sandboxID"},`+ + `"linux": {"mountLabel": ""}}`, + ), + 0o644, + ) + Expect(err).To(BeNil()) + defer os.RemoveAll("config.json") + + config.Conmon = "/bin/true" + + // When + err = sut.RestoreContainer(context.Background(), myContainer, specgen, 42, "no-parent-cgroup-exists") + defer os.RemoveAll("restore.log") + + // Then + Expect(err).NotTo(BeNil()) + Expect(err.Error()).To(ContainSubstring("failed to wait")) + }) + It("RestoreContainer should fail with missing inventory", func() { + if !criu.CheckForCriu(criu.PodCriuVersion) { + Skip("CRIU is missing or too old.") + } + // Given + beforeEach(sandboxID) + specgen := &specs.Spec{ + Version: "1.0.0", + Annotations: map[string]string{"io.kubernetes.cri-o.SandboxID": "sandboxID"}, + Linux: &specs.Linux{ + MountLabel: ".", + }, + } + // When + err := sut.RestoreContainer(context.Background(), myContainer, specgen, 42, "no-parent-cgroup-exists") + + // Then + Expect(err).NotTo(BeNil()) + Expect(err.Error()).To(Equal("a complete checkpoint for this container cannot be found, cannot restore: stat checkpoint/inventory.img: no such file or directory")) + }) }) }) diff --git a/internal/oci/runtime_oci.go b/internal/oci/runtime_oci.go index 6dae64567ef..32b1ae9d0f8 100644 --- a/internal/oci/runtime_oci.go +++ b/internal/oci/runtime_oci.go @@ -14,8 +14,12 @@ import ( "syscall" "time" + metadata "github.com/checkpoint-restore/checkpointctl/lib" "github.com/containernetworking/plugins/pkg/ns" conmonconfig "github.com/containers/conmon/runner/config" + "github.com/containers/podman/v4/pkg/annotations" + "github.com/containers/podman/v4/pkg/checkpoint/crutils" + "github.com/containers/podman/v4/pkg/criu" "github.com/containers/storage/pkg/pools" "github.com/cri-o/cri-o/internal/config/cgmgr" "github.com/cri-o/cri-o/internal/log" @@ -26,6 +30,7 @@ import ( "github.com/fsnotify/fsnotify" json "github.com/json-iterator/go" rspec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" "github.com/sirupsen/logrus" "golang.org/x/net/context" "golang.org/x/sys/unix" @@ -80,7 +85,7 @@ type exitCodeInfo struct { } // CreateContainer creates a container. -func (r *runtimeOCI) CreateContainer(ctx context.Context, c *Container, cgroupParent string) (retErr error) { +func (r *runtimeOCI) CreateContainer(ctx context.Context, c *Container, cgroupParent string, restore bool) (retErr error) { if c.Spoofed() { return nil } @@ -134,6 +139,31 @@ func (r *runtimeOCI) CreateContainer(ctx context.Context, c *Container, cgroupPa } args = append(args, "-i") } + if restore { + logrus.Debugf("Restore is true %v", restore) + args = append(args, "--restore", c.CheckpointPath()) + if c.Spec().Process.SelinuxLabel != "" { + args = append( + args, + "--runtime-opt", + fmt.Sprintf( + "--lsm-profile=selinux:%s", + c.Spec().Process.SelinuxLabel, + ), + ) + } + if c.Spec().Linux.MountLabel != "" { + args = append( + args, + "--runtime-opt", + fmt.Sprintf( + "--lsm-mount-context=%s", + c.Spec().Linux.MountLabel, + ), + ) + } + } + logrus.WithFields(logrus.Fields{ "args": args, }).Debugf("running conmon: %s", r.handler.MonitorPath) @@ -156,6 +186,12 @@ func (r *runtimeOCI) CreateContainer(ctx context.Context, c *Container, cgroupPa if v, found := os.LookupEnv("XDG_RUNTIME_DIR"); found { cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", v)) } + if restore { + // The CRIU binary is usually in /usr/sbin/criu + if v, found := os.LookupEnv("PATH"); found { + cmd.Env = append(cmd.Env, fmt.Sprintf("PATH=/usr/sbin/%s", v)) + } + } err = cmd.Start() if err != nil { @@ -248,6 +284,10 @@ func (r *runtimeOCI) CreateContainer(ctx context.Context, c *Container, cgroupPa pid = ss.si.Pid if ss.si.Pid == -1 { if ss.si.Message != "" { + if restore { + log.Errorf(ctx, "Container restore error: %s", ss.si.Message) + return fmt.Errorf("container restore failed: %s", ss.si.Message) + } log.Errorf(ctx, "Container creation error: %s", ss.si.Message) return fmt.Errorf("container create failed: %s", ss.si.Message) } @@ -1344,6 +1384,22 @@ func prepareProcessExec(c *Container, cmd []string, tty bool) (processFile strin return processFile, nil } +// ReadConmonPidFile attempts to read conmon's pid from its pid file +// This function makes no verification that this file should exist +// it is up to the caller to verify that this container has a conmon +func ReadConmonPidFile(c *Container) (int, error) { + contents, err := os.ReadFile(c.conmonPidFilePath()) + if err != nil { + return -1, err + } + // Convert it to an int + conmonPID, err := strconv.Atoi(string(contents)) + if err != nil { + return -1, err + } + return conmonPID, nil +} + func (c *Container) conmonPidFilePath() string { return filepath.Join(c.bundlePath, "conmon-pidfile") } @@ -1376,3 +1432,208 @@ func (r *runtimeOCI) defaultRuntimeArgs() []string { } return args } + +// CheckpointContainer checkpoints a container. +func (r *runtimeOCI) CheckpointContainer(ctx context.Context, c *Container, specgen *rspec.Spec, leaveRunning bool) error { + c.opLock.Lock() + defer c.opLock.Unlock() + + if err := r.checkpointRestoreSupported(); err != nil { + return err + } + + // Once CRIU infects the process in the container with the + // parasite, the parasite also wants to write to the log + // file which is outside of the container. Giving the log file + // the label of the container enables logging for the parasite. + if err := crutils.CRCreateFileWithLabel( + c.Dir(), + metadata.DumpLogFile, + specgen.Linux.MountLabel, + ); err != nil { + return err + } + + // workPath will be used to store dump.log and stats-dump + workPath := c.Dir() + // imagePath is used by CRIU to store the actual checkpoint files + imagePath := c.CheckpointPath() + + logrus.Debugf("Writing checkpoint to %s", imagePath) + logrus.Debugf("Writing checkpoint logs to %s", workPath) + args := []string{} + args = append( + args, + "checkpoint", + "--image-path", + imagePath, + "--work-path", + workPath, + ) + if leaveRunning { + args = append(args, "--leave-running") + } + + args = append(args, c.ID()) + + _, err := r.runtimeCmd(args...) + if err != nil { + return fmt.Errorf("running %q %q failed: %w", r.handler.RuntimePath, args, err) + } + + if !leaveRunning { + c.state.Status = ContainerStateStopped + c.state.ExitCode = utils.Int32Ptr(0) + c.state.Finished = time.Now() + } + + return nil +} + +// RestoreContainer restores a container. +func (r *runtimeOCI) RestoreContainer(ctx context.Context, c *Container, sbSpec *rspec.Spec, infraPid int, cgroupParent string) error { + if err := r.checkpointRestoreSupported(); err != nil { + return err + } + + // Let's try to stat() CRIU's inventory file. If it does not exist, it makes + // no sense to try a restore. This is a minimal check if a checkpoint exist. + if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) { + return fmt.Errorf("a complete checkpoint for this container cannot be found, cannot restore: %w", err) + } + + // remove conmon files + attachFile := filepath.Join(c.BundlePath(), "attach") + if err := os.Remove(attachFile); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("error removing container %s attach file: %w", c.ID(), err) + } + + ctlFile := filepath.Join(c.BundlePath(), "ctl") + if err := os.Remove(ctlFile); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("error removing container %s ctl file: %w", c.ID(), err) + } + + winszFile := filepath.Join(c.BundlePath(), "winsz") + if err := os.Remove(winszFile); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("error removing container %s winsz file: %w", c.ID(), err) + } + + // Figure out if this container will be restored in another sandbox + oldSbID := c.Sandbox() + if oldSbID == "" { + return fmt.Errorf("failed to detect sandbox of to be restored container %s", c.ID()) + } + newSbID := sbSpec.Annotations[annotations.SandboxID] + if newSbID == "" { + return fmt.Errorf("failed to detect destination sandbox of to be restored container %s", c.ID()) + } + + // Get config.json to adapt for restore (mostly annotations for restore in another sandbox) + configFile := filepath.Join(c.BundlePath(), "config.json") + specgen, err := generate.NewFromFile(configFile) + if err != nil { + return err + } + + if oldSbID != newSbID { + // The container will be restored in another (not the original) sandbox + // Adapt to namespaces of the new sandbox + for i, n := range specgen.Config.Linux.Namespaces { + if n.Path == "" { + // The namespace in the original container did not point to + // an existing interface. Leave it as it is. + continue + } + for _, on := range sbSpec.Linux.Namespaces { + if on.Type == n.Type { + var nsPath string + if n.Type == rspec.NetworkNamespace { + // Type for network namespaces is 'network'. + // The kernel link is 'net'. + nsPath = fmt.Sprintf("/proc/%d/ns/%s", infraPid, "net") + } else { + nsPath = fmt.Sprintf("/proc/%d/ns/%s", infraPid, n.Type) + } + specgen.Config.Linux.Namespaces[i].Path = nsPath + break + } + } + } + + // Update Sandbox Name + specgen.AddAnnotation(annotations.SandboxName, sbSpec.Annotations[annotations.Name]) + // Update Sandbox ID + specgen.AddAnnotation(annotations.SandboxID, newSbID) + + // Update Name + ctrMetadata := types.ContainerMetadata{} + err = json.Unmarshal([]byte(sbSpec.Annotations[annotations.Metadata]), &ctrMetadata) + if err != nil { + return err + } + ctrName := ctrMetadata.Name + + podMetadata := types.PodSandboxMetadata{} + err = json.Unmarshal([]byte(specgen.Config.Annotations[annotations.Metadata]), &podMetadata) + if err != nil { + return err + } + uid := podMetadata.Uid + mData := fmt.Sprintf("k8s_%s_%s_%s_%s0", ctrName, sbSpec.Annotations[annotations.KubeName], sbSpec.Annotations[annotations.Namespace], uid) + specgen.AddAnnotation(annotations.Name, mData) + + c.SetSandbox(newSbID) + + saveOptions := generate.ExportOptions{} + if err := specgen.SaveToFile(configFile, saveOptions); err != nil { + return err + } + } + + c.state.InitPid = 0 + c.state.InitStartTime = "" + + // It is possible to tell runc to place the CRIU log files + // at a custom location '--work-path'. But for restoring a + // container we are not calling runc directly but conmon, which + // then calls runc. It would be possible to change conmon to + // also have the log file in the same location as during + // checkpointing, but it is not really that important right now. + if err := crutils.CRCreateFileWithLabel( + c.BundlePath(), + metadata.RestoreLogFile, + specgen.Config.Linux.MountLabel, + ); err != nil { + return err + } + + if err := r.CreateContainer(ctx, c, cgroupParent, true); err != nil { + return err + } + + // Once the container is restored, update the metadata + // 1. Container is running again + c.state.Status = ContainerStateRunning + // 2. Update PID of the container (without that stopping will fail) + pid, err := ReadConmonPidFile(c) + if err != nil { + return err + } + c.state.Pid = pid + // 3. Reset ExitCode (also needed for stopping) + c.state.ExitCode = nil + // 4. Set start time + c.state.Started = time.Now() + + return nil +} + +func (r *runtimeOCI) checkpointRestoreSupported() error { + if !criu.CheckForCriu(criu.PodCriuVersion) { + return fmt.Errorf("checkpoint/restore requires at least CRIU %d", criu.PodCriuVersion) + } + if !crutils.CRRuntimeSupportsCheckpointRestore(r.handler.RuntimePath) { + return fmt.Errorf("configured runtime does not support checkpoint/restore") + } + return nil +} diff --git a/internal/oci/runtime_pod.go b/internal/oci/runtime_pod.go index a0cce5a9cb1..c9dfe263e36 100644 --- a/internal/oci/runtime_pod.go +++ b/internal/oci/runtime_pod.go @@ -80,7 +80,7 @@ func newRuntimePod(r *Runtime, handler *config.RuntimeHandler, c *Container) (Ru }, nil } -func (r *runtimePod) CreateContainer(ctx context.Context, c *Container, cgroupParent string) error { +func (r *runtimePod) CreateContainer(ctx context.Context, c *Container, cgroupParent string, restore bool) error { // If this container is the infra container, all that needs to be done is move conmonrs to the pod cgroup if c.IsInfra() { v, err := r.client.Version(ctx) @@ -135,6 +135,25 @@ func (r *runtimePod) StartContainer(ctx context.Context, c *Container) error { return r.oci.StartContainer(ctx, c) } +func (r *runtimePod) CheckpointContainer( + ctx context.Context, + c *Container, + specgen *rspec.Spec, + leaveRunning bool, +) error { + return r.oci.CheckpointContainer(ctx, c, specgen, leaveRunning) +} + +func (r *runtimePod) RestoreContainer( + ctx context.Context, + c *Container, + sbSpec *rspec.Spec, + infraPid int, + cgroupParent string, +) error { + return r.oci.RestoreContainer(ctx, c, sbSpec, infraPid, cgroupParent) +} + func (r *runtimePod) ExecContainer(ctx context.Context, c *Container, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error { return r.oci.ExecContainer(ctx, c, cmd, stdin, stdout, stderr, tty, resize) } diff --git a/internal/oci/runtime_vm.go b/internal/oci/runtime_vm.go index cf7cb765bcd..d020b4e0b1a 100644 --- a/internal/oci/runtime_vm.go +++ b/internal/oci/runtime_vm.go @@ -93,7 +93,7 @@ func newRuntimeVM(path, root, configPath string) RuntimeImpl { } // CreateContainer creates a container. -func (r *runtimeVM) CreateContainer(ctx context.Context, c *Container, cgroupParent string) (retErr error) { +func (r *runtimeVM) CreateContainer(ctx context.Context, c *Container, cgroupParent string, restore bool) (retErr error) { log.Debugf(ctx, "RuntimeVM.CreateContainer() start") defer log.Debugf(ctx, "RuntimeVM.CreateContainer() end") @@ -1040,3 +1040,19 @@ func (r *runtimeVM) closeIO(ctrID, execID string) error { return nil } + +// CheckpointContainer not implemented for runtimeVM +func (r *runtimeVM) CheckpointContainer(ctx context.Context, c *Container, specgen *rspec.Spec, leaveRunning bool) error { + logrus.Debug("runtimeVM.CheckpointContainer() start") + defer logrus.Debug("runtimeVM.CheckpointContainer() end") + + return errors.New("checkpointing not implemented for runtimeVM") +} + +// RestoreContainer not implemented for runtimeVM +func (r *runtimeVM) RestoreContainer(ctx context.Context, c *Container, sbSpec *rspec.Spec, infraPid int, cgroupParent string) error { + logrus.Debug("runtimeVM.RestoreContainer() start") + defer logrus.Debug("runtimeVM.RestoreContainer() end") + + return errors.New("restoring not implemented for runtimeVM") +} diff --git a/internal/oci/suite_test.go b/internal/oci/suite_test.go index 22df4130bed..0e9479d1259 100644 --- a/internal/oci/suite_test.go +++ b/internal/oci/suite_test.go @@ -5,6 +5,7 @@ import ( "time" "github.com/cri-o/cri-o/internal/oci" + libconfig "github.com/cri-o/cri-o/pkg/config" . "github.com/cri-o/cri-o/test/framework" containerstoragemock "github.com/cri-o/cri-o/test/mocks/containerstorage" "github.com/golang/mock/gomock" @@ -20,11 +21,28 @@ func TestOci(t *testing.T) { } var ( - t *TestFramework - mockCtrl *gomock.Controller - storeMock *containerstoragemock.MockStore + t *TestFramework + mockCtrl *gomock.Controller + storeMock *containerstoragemock.MockStore + myContainer *oci.Container + config *libconfig.Config ) +const ( + sandboxID = "sandboxID" + containerID = "containerID" +) + +func beforeEach(sbID string) { + var err error + myContainer, err = oci.NewContainer(containerID, "", "", "", + make(map[string]string), make(map[string]string), + make(map[string]string), "", "", "", + &types.ContainerMetadata{}, sbID, false, + false, false, "", "", time.Now(), "") + Expect(err).To(BeNil()) +} + var _ = BeforeSuite(func() { t = NewTestFramework(NilFunc, NilFunc) t.Setup() diff --git a/pkg/config/config.go b/pkg/config/config.go index 0f85bd64add..7f4d58de9e3 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -363,6 +363,10 @@ type RuntimeConfig struct { // to manage namespace lifecycle PinnsPath string `toml:"pinns_path"` + // CriuPath is the path to find the criu binary, which is needed + // to checkpoint and restore containers + EnableCriuSupport bool `toml:"enable_criu_support"` + // Runtimes defines a list of OCI compatible runtimes. The runtime to // use is picked based on the runtime_handler provided by the CRI. If // no runtime_handler is provided, the runtime will be picked based on @@ -1067,6 +1071,16 @@ func (c *RuntimeConfig) Validate(systemContext *types.SystemContext, onExecution return fmt.Errorf("initialize nsmgr: %w", err) } + if c.EnableCriuSupport { + if err := validateCriuInPath(); err != nil { + c.EnableCriuSupport = false + return errors.New("cannot enable checkpoint/restore support without the criu binary in $PATH") + } + logrus.Infof("Checkpoint/restore support enabled") + } else { + logrus.Infof("Checkpoint/restore support disabled") + } + c.seccompConfig.SetUseDefaultWhenEmpty(c.SeccompUseDefaultWhenEmpty) if err := c.seccompConfig.LoadProfile(c.SeccompProfile); err != nil { @@ -1199,6 +1213,12 @@ func (c *RuntimeConfig) ValidatePinnsPath(executable string) error { return err } +func validateCriuInPath() error { + _, err := validateExecutablePath("criu", "") + + return err +} + // Seccomp returns the seccomp configuration func (c *RuntimeConfig) Seccomp() *seccomp.Config { return c.seccompConfig @@ -1238,6 +1258,10 @@ func (c *RuntimeConfig) Devices() []device.Device { return c.deviceConfig.Devices() } +func (c *RuntimeConfig) CheckpointRestore() bool { + return c.EnableCriuSupport +} + func validateExecutablePath(executable, currentPath string) (string, error) { if currentPath == "" { path, err := exec.LookPath(executable) diff --git a/pkg/config/config_test_inject.go b/pkg/config/config_test_inject.go index ac092147041..2450f2a275c 100644 --- a/pkg/config/config_test_inject.go +++ b/pkg/config/config_test_inject.go @@ -25,3 +25,9 @@ func (c *Config) SetCNIPlugin(plugin ocicni.CNIPlugin) error { func (c *Config) SetNamespaceManager(nsMgr *nsmgr.NamespaceManager) { c.namespaceManager = nsMgr } + +// SetCheckpointRestore offers the possibility to turn on and +// turn off CheckpointRestore support for testing. +func (c *RuntimeConfig) SetCheckpointRestore(cr bool) { + c.EnableCriuSupport = cr +} diff --git a/pkg/config/template.go b/pkg/config/template.go index 9c60408ee75..19f6a0ce044 100644 --- a/pkg/config/template.go +++ b/pkg/config/template.go @@ -407,6 +407,11 @@ func initCrioTemplateConfig(c *Config) ([]*templateConfigValue, error) { group: crioRuntimeConfig, isDefaultValue: simpleEqual(dc.PinnsPath, c.PinnsPath), }, + { + templateString: templateStringCrioRuntimeEnableCriuSupport, + group: crioRuntimeConfig, + isDefaultValue: simpleEqual(dc.EnableCriuSupport, c.EnableCriuSupport), + }, { templateString: templateStringCrioRuntimeDefaultRuntime, group: crioRuntimeConfig, @@ -1040,7 +1045,14 @@ const templateStringCrioRuntimePinnsPath = `# pinns_path is the path to find the ` +const templateStringCrioRuntimeEnableCriuSupport = `# Globally enable/disable CRIU support which is necessary to +# checkpoint and restore container or pods (even if CRIU is found in $PATH). +{{ $.Comment }}enable_criu_support = {{ .EnableCriuSupport }} + +` + const templateStringCrioRuntimeDefaultRuntime = `# default_runtime is the _name_ of the OCI runtime to be used as the default. +# default_runtime is the _name_ of the OCI runtime to be used as the default. # The name is matched against the runtimes map below. If this value is changed, # the corresponding existing entry from the runtimes map below will be ignored. {{ $.Comment }}default_runtime = "{{ .DefaultRuntime }}" diff --git a/scripts/github-actions-packages b/scripts/github-actions-packages index d20a93d3812..1fc3290a4c3 100755 --- a/scripts/github-actions-packages +++ b/scripts/github-actions-packages @@ -1,8 +1,14 @@ #!/usr/bin/env bash set -euo pipefail +CRIU_REPO="https://download.opensuse.org/repositories/devel:/tools:/criu/xUbuntu_20.04" + +curl -fSsl $CRIU_REPO/Release.key | sudo apt-key add - +echo "deb $CRIU_REPO/ /" | sudo tee /etc/apt/sources.list.d/criu.list + sudo apt update sudo apt install -y \ + criu \ libaio-dev \ libapparmor-dev \ libbtrfs-dev \ diff --git a/scripts/versions b/scripts/versions index 917b6f3b741..49d30deb199 100755 --- a/scripts/versions +++ b/scripts/versions @@ -5,7 +5,7 @@ set -euo pipefail declare -A VERSIONS=( ["cni-plugins"]=v1.1.1 ["conmon"]=v2.1.2 - ["cri-tools"]=v1.24.2 + ["cri-tools"]=v1.25.0 ["runc"]=v1.1.3 ["crun"]=1.5 ["bats"]=v1.6.0 diff --git a/server/container_checkpoint.go b/server/container_checkpoint.go new file mode 100644 index 00000000000..cfd67d42bb0 --- /dev/null +++ b/server/container_checkpoint.go @@ -0,0 +1,162 @@ +package server + +import ( + "fmt" + "io" + "os" + "path/filepath" + + metadata "github.com/checkpoint-restore/checkpointctl/lib" + "github.com/containers/podman/v4/libpod" + "github.com/containers/storage/pkg/archive" + "github.com/cri-o/cri-o/internal/lib" + "github.com/cri-o/cri-o/internal/log" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + types "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// CheckpointContainer checkpoints a container +func (s *Server) CheckpointContainer(ctx context.Context, req *types.CheckpointContainerRequest) error { + if !s.config.RuntimeConfig.CheckpointRestore() { + return fmt.Errorf("checkpoint/restore support not available") + } + + var opts []*lib.ContainerCheckpointRestoreOptions + var podCheckpointDirectory string + var checkpointedPodOptions metadata.CheckpointedPodOptions + + _, err := s.GetContainerFromShortID(req.ContainerId) + if err != nil { + // Maybe the user specified a Pod + sb, err := s.LookupSandbox(req.ContainerId) + if err != nil { + return status.Errorf(codes.NotFound, "could not find container or pod %q: %v", req.ContainerId, err) + } + if req.Location == "" { + return status.Errorf(codes.NotFound, "Pod checkpointing requires a destination file") + } + + log.Infof(ctx, "Checkpointing pod: %s", req.ContainerId) + // Create a temporary directory + podCheckpointDirectory, err = os.MkdirTemp("", "checkpoint") + if err != nil { + return err + } + sandboxConfig := types.PodSandboxConfig{ + Metadata: &types.PodSandboxMetadata{ + Name: sb.Metadata().Name, + Uid: sb.Metadata().Uid, + Namespace: sb.Metadata().Namespace, + Attempt: sb.Metadata().Attempt, + }, + Hostname: sb.Hostname(), + LogDirectory: sb.LogDir(), + } + var portMappings []*types.PortMapping + maps := sb.PortMappings() + for _, portMap := range maps { + pm := &types.PortMapping{ + ContainerPort: portMap.ContainerPort, + HostPort: portMap.HostPort, + HostIp: portMap.HostIP, + } + switch portMap.Protocol { + case "TCP": + pm.Protocol = 0 + case "UDP": + pm.Protocol = 1 + case "SCTP": + pm.Protocol = 2 + } + + portMappings = append(portMappings, pm) + } + sandboxConfig.PortMappings = portMappings + if sb.DNSConfig() != nil { + dnsConfig := &types.DNSConfig{ + Servers: sb.DNSConfig().Servers, + Searches: sb.DNSConfig().Searches, + Options: sb.DNSConfig().Options, + } + sandboxConfig.DnsConfig = dnsConfig + } + if _, err := metadata.WriteJSONFile(sandboxConfig, podCheckpointDirectory, metadata.PodDumpFile); err != nil { + return err + } + defer func() { + if err := os.RemoveAll(podCheckpointDirectory); err != nil { + logrus.Errorf("Could not recursively remove %s: %q", podCheckpointDirectory, err) + } + }() + + for _, ctr := range sb.Containers().List() { + localOpts := &lib.ContainerCheckpointRestoreOptions{ + Container: ctr.ID(), + ContainerCheckpointOptions: libpod.ContainerCheckpointOptions{ + TargetFile: filepath.Join(podCheckpointDirectory, ctr.Name()+".tar"), + KeepRunning: true, + }, + } + opts = append(opts, localOpts) + // This should be ID + checkpointedPodOptions.Containers = append(checkpointedPodOptions.Containers, ctr.Name()) + } + if len(opts) == 0 { + return status.Errorf(codes.NotFound, "No containers found in Pod %q", req.ContainerId) + } + checkpointedPodOptions.Version = 1 + checkpointedPodOptions.MountLabel = sb.MountLabel() + checkpointedPodOptions.ProcessLabel = sb.ProcessLabel() + } else { + log.Infof(ctx, "Checkpointing container: %s", req.ContainerId) + localOpts := &lib.ContainerCheckpointRestoreOptions{ + Container: req.ContainerId, + ContainerCheckpointOptions: libpod.ContainerCheckpointOptions{ + TargetFile: req.Location, + // For the forensic container checkpointing use case we + // keep the container running after checkpointing it. + KeepRunning: true, + }, + } + opts = append(opts, localOpts) + } + + for _, opt := range opts { + _, err = s.ContainerServer.ContainerCheckpoint(ctx, opt) + if err != nil { + return err + } + } + + if podCheckpointDirectory != "" { + if podOptions, err := metadata.WriteJSONFile(checkpointedPodOptions, podCheckpointDirectory, metadata.PodOptionsFile); err != nil { + return fmt.Errorf("error creating checkpointedContainers list file %q: %w", podOptions, err) + } + // It is a Pod checkpoint. Create the archive + podTar, err := archive.TarWithOptions(podCheckpointDirectory, &archive.TarOptions{ + IncludeSourceDir: true, + }) + if err != nil { + return err + } + // The resulting tar archive should not readable by everyone as it contains + // every memory page of the checkpointed processes. + podTarFile, err := os.OpenFile(req.Location, os.O_RDWR|os.O_CREATE, 0o600) + if err != nil { + return fmt.Errorf("error creating pod checkpoint archive %q: %w", req.Location, err) + } + defer podTarFile.Close() + _, err = io.Copy(podTarFile, podTar) + if err != nil { + return fmt.Errorf("failed writing to pod tar archive %q: %w", req.Location, err) + } + log.Infof(ctx, "Checkpointed pod: %s", req.ContainerId) + } else { + log.Infof(ctx, "Checkpointed container: %s", req.ContainerId) + } + + return nil +} diff --git a/server/container_checkpoint_test.go b/server/container_checkpoint_test.go new file mode 100644 index 00000000000..f8d0aa5fba3 --- /dev/null +++ b/server/container_checkpoint_test.go @@ -0,0 +1,214 @@ +package server_test + +import ( + "context" + "os" + + "github.com/containers/podman/v4/pkg/criu" + cstorage "github.com/containers/storage" + "github.com/containers/storage/pkg/archive" + "github.com/cri-o/cri-o/internal/hostport" + "github.com/cri-o/cri-o/internal/oci" + "github.com/golang/mock/gomock" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + specs "github.com/opencontainers/runtime-spec/specs-go" + "google.golang.org/grpc/status" + types "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +var _ = t.Describe("ContainerCheckpoint", func() { + // Prepare the sut + BeforeEach(func() { + beforeEach() + createDummyConfig() + mockRuncInLibConfig() + if !criu.CheckForCriu(criu.PodCriuVersion) { + Skip("CRIU is missing or too old.") + } + serverConfig.SetCheckpointRestore(true) + setupSUT() + }) + + AfterEach(func() { + afterEach() + os.RemoveAll("config.dump") + os.RemoveAll("cp.tar") + os.RemoveAll("dump.log") + os.RemoveAll("spec.dump") + }) + + t.Describe("ContainerCheckpoint", func() { + It("should succeed", func() { + // Given + addContainerAndSandbox() + + testContainer.SetState(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateRunning}, + }) + testContainer.SetSpec(&specs.Spec{Version: "1.0.0"}) + + gomock.InOrder( + runtimeServerMock.EXPECT().StopContainer(gomock.Any()). + Return(nil), + ) + + // When + err := sut.CheckpointContainer(context.Background(), + &types.CheckpointContainerRequest{ + ContainerId: testContainer.ID(), + }) + + // Then + Expect(err).To(BeNil()) + }) + + It("should fail with invalid container id", func() { + // Given + // When + err := sut.CheckpointContainer(context.Background(), + &types.CheckpointContainerRequest{ + ContainerId: testContainer.ID(), + }) + + // Then + Expect(err).NotTo(BeNil()) + }) + It("should fail with valid pod id without archive", func() { + // Given + addContainerAndSandbox() + // When + err := sut.CheckpointContainer(context.Background(), + &types.CheckpointContainerRequest{ + ContainerId: testSandbox.ID(), + }) + + // Then + Expect(status.Convert(err).Message()).To(Equal("Pod checkpointing requires a destination file")) + }) + It("should succeed with valid pod id and archive", func() { + // Given + addContainerAndSandbox() + + testContainer.SetState(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateRunning}, + }) + testContainer.SetSpec(&specs.Spec{Version: "1.0.0"}) + + gomock.InOrder( + storeMock.EXPECT().Container(gomock.Any()).Return(&cstorage.Container{}, nil), + storeMock.EXPECT().Changes(gomock.Any(), gomock.Any()).Return([]archive.Change{}, nil), + imageServerMock.EXPECT().GetStore().Return(storeMock), + storeMock.EXPECT().Mount(gomock.Any(), gomock.Any()).Return("/tmp/", nil), + runtimeServerMock.EXPECT().StopContainer(gomock.Any()).Return(nil), + ) + // When + err := sut.CheckpointContainer(context.Background(), + &types.CheckpointContainerRequest{ + ContainerId: testSandbox.ID(), + Location: "cp.tar", + }) + + // Then + Expect(err).To(BeNil()) + }) + It("should succeed with valid pod id and archive and DNSConfig and PortMapping", func() { + // Given + addContainerAndSandbox() + + testContainer.SetState(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateRunning}, + }) + testContainer.SetSpec(&specs.Spec{Version: "1.0.0"}) + testSandbox.SetDNSConfig(&types.DNSConfig{ + Servers: []string{"server1", "server2"}, + Searches: []string{"searche1", "searches"}, + Options: []string{"option1", "option2"}, + }) + testSandbox.SetPortMappings([]*hostport.PortMapping{ + { + ContainerPort: 2222, + HostPort: 1222, + Protocol: "TCP", + }, + { + ContainerPort: 2222, + HostPort: 1223, + Protocol: "UDP", + }, + { + ContainerPort: 2222, + HostIP: "127.0.0.2", + HostPort: 1224, + Protocol: "SCTP", + }, + }) + + gomock.InOrder( + storeMock.EXPECT().Container(gomock.Any()).Return(&cstorage.Container{}, nil), + storeMock.EXPECT().Changes(gomock.Any(), gomock.Any()).Return([]archive.Change{}, nil), + imageServerMock.EXPECT().GetStore().Return(storeMock), + storeMock.EXPECT().Mount(gomock.Any(), gomock.Any()).Return("/tmp/", nil), + runtimeServerMock.EXPECT().StopContainer(gomock.Any()).Return(nil), + ) + // When + err := sut.CheckpointContainer(context.Background(), + &types.CheckpointContainerRequest{ + ContainerId: testSandbox.ID(), + Location: "cp.tar", + }) + + // Then + Expect(err).To(BeNil()) + }) + It("should fail with valid pod id and archive (with empty Container())", func() { + // Given + addContainerAndSandbox() + + testContainer.SetState(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateRunning}, + }) + testContainer.SetSpec(&specs.Spec{Version: "1.0.0"}) + + gomock.InOrder( + storeMock.EXPECT().Container(gomock.Any()).Return(nil, t.TestError), + ) + // When + err := sut.CheckpointContainer(context.Background(), + &types.CheckpointContainerRequest{ + ContainerId: testSandbox.ID(), + Location: "cp.tar", + }) + + // Then + Expect(err.Error()).To(Equal(`failed to write file system changes of container containerID: error exporting root file-system diff for "containerID": error`)) + }) + }) +}) + +var _ = t.Describe("ContainerCheckpoint with CheckpointRestore set to false", func() { + // Prepare the sut + BeforeEach(func() { + beforeEach() + createDummyConfig() + mockRuncInLibConfig() + serverConfig.SetCheckpointRestore(false) + setupSUT() + }) + + AfterEach(afterEach) + + t.Describe("ContainerCheckpoint", func() { + It("should fail with checkpoint/restore support not available", func() { + // Given + // When + err := sut.CheckpointContainer(context.Background(), + &types.CheckpointContainerRequest{ + ContainerId: testContainer.ID(), + }) + + // Then + Expect(err.Error()).To(Equal(`checkpoint/restore support not available`)) + }) + }) +}) diff --git a/server/container_create.go b/server/container_create.go index 80867de29fc..a244abce30e 100644 --- a/server/container_create.go +++ b/server/container_create.go @@ -290,6 +290,45 @@ func generateUserString(username, imageUser string, uid *types.Int64Value) strin func (s *Server) CreateContainer(ctx context.Context, req *types.CreateContainerRequest) (res *types.CreateContainerResponse, retErr error) { log.Infof(ctx, "Creating container: %s", translateLabelsToDescription(req.GetConfig().GetLabels())) + // Check if image is a file. If it is a file it might be a checkpoint archive. + checkpointImage := func() bool { + if req.Config == nil || + req.Config.Image == nil || + req.SandboxConfig == nil || + req.SandboxConfig.Metadata == nil { + return false + } + if _, err := os.Stat(req.Config.Image.Image); err == nil { + log.Debugf( + ctx, + "%q is a file. Assuming it is a checkpoint archive", + req.Config.Image.Image, + ) + return true + } + return false + }() + if checkpointImage { + // This might be a checkpoint image. Let's pass + // it to the checkpoint code. + ctrID, err := s.CRImportCheckpoint( + ctx, + req.Config.Image.Image, + req.PodSandboxId, + req.SandboxConfig.Metadata.Uid, + req.Config.Mounts, + req.Config.Annotations, + ) + if err != nil { + return nil, err + } + log.Debugf(ctx, "Prepared %s for restore\n", ctrID) + + return &types.CreateContainerResponse{ + ContainerId: ctrID, + }, nil + } + sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) if err != nil { if err == sandbox.ErrIDEmpty { @@ -314,7 +353,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *types.CreateContainer return nil, fmt.Errorf("setting container config: %w", err) } - if err := ctr.SetNameAndID(); err != nil { + if err := ctr.SetNameAndID(""); err != nil { return nil, fmt.Errorf("setting container name and ID: %w", err) } diff --git a/server/container_create_linux.go b/server/container_create_linux.go index faadee999b4..7b0b73899d1 100644 --- a/server/container_create_linux.go +++ b/server/container_create_linux.go @@ -54,7 +54,7 @@ func (s *Server) createContainerPlatform(ctx context.Context, container *oci.Con return err } } - return s.Runtime().CreateContainer(ctx, container, cgroupParent) + return s.Runtime().CreateContainer(ctx, container, cgroupParent, false) } // makeAccessible changes the path permission and each parent directory to have --x--x--x @@ -927,8 +927,17 @@ func addOCIBindMounts(ctx context.Context, ctr ctrfactory.Container, mountLabel, return nil, nil, fmt.Errorf("cannot mount %s: path does not exist and will cause issues as a directory", toReject) } } - if err = os.MkdirAll(src, 0o755); err != nil { - return nil, nil, fmt.Errorf("failed to mkdir %s: %s", src, err) + if !ctr.Restore() { + // Although this would also be really helpful for restoring containers + // it is problematic as during restore external bind mounts need to be + // a file if the destination is a file. Unfortunately it is not easy + // to tell if the destination is a file or a directory. Especially if + // the destination is a nested bind mount. For now we will just not + // create the missing bind mount source for restore and return an + // error to the user. + if err = os.MkdirAll(src, 0o755); err != nil { + return nil, nil, fmt.Errorf("failed to mkdir %s: %s", src, err) + } } } diff --git a/server/container_restore.go b/server/container_restore.go new file mode 100644 index 00000000000..6db796823b8 --- /dev/null +++ b/server/container_restore.go @@ -0,0 +1,279 @@ +package server + +import ( + "encoding/json" + "fmt" + "os" + + metadata "github.com/checkpoint-restore/checkpointctl/lib" + "github.com/containers/podman/v4/pkg/annotations" + "github.com/containers/podman/v4/pkg/errorhandling" + "github.com/containers/storage/pkg/archive" + "github.com/cri-o/cri-o/internal/factory/container" + "github.com/cri-o/cri-o/internal/lib" + "github.com/cri-o/cri-o/internal/lib/sandbox" + "github.com/cri-o/cri-o/internal/log" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + types "k8s.io/cri-api/pkg/apis/runtime/v1" + kubetypes "k8s.io/kubernetes/pkg/kubelet/types" +) + +// taken from Podman +func (s *Server) CRImportCheckpoint( + ctx context.Context, + input, sbID, sandboxUID string, + createMounts []*types.Mount, + createAnnotations map[string]string, +) (ctrID string, retErr error) { + // First get the container definition from the + // tarball to a temporary directory + archiveFile, err := os.Open(input) + if err != nil { + return "", fmt.Errorf("failed to open checkpoint archive %s for import: %w", input, err) + } + defer errorhandling.CloseQuiet(archiveFile) + options := &archive.TarOptions{ + // Here we only need the files config.dump and spec.dump + ExcludePatterns: []string{ + "artifacts", + "ctr.log", + metadata.RootFsDiffTar, + metadata.NetworkStatusFile, + metadata.DeletedFilesFile, + metadata.CheckpointDirectory, + }, + } + dir, err := os.MkdirTemp("", "checkpoint") + if err != nil { + return "", err + } + defer func() { + if err := os.RemoveAll(dir); err != nil { + logrus.Errorf("Could not recursively remove %s: %q", dir, err) + } + }() + err = archive.Untar(archiveFile, dir, options) + if err != nil { + return "", fmt.Errorf("unpacking of checkpoint archive %s failed: %w", input, err) + } + logrus.Debugf("Unpacked checkpoint in %s", dir) + + // Load spec.dump from temporary directory + dumpSpec := new(spec.Spec) + if _, err := metadata.ReadJSONFile(dumpSpec, dir, metadata.SpecDumpFile); err != nil { + return "", fmt.Errorf("failed to read %q: %w", metadata.SpecDumpFile, err) + } + + // Load config.dump from temporary directory + config := new(lib.ContainerConfig) + if _, err := metadata.ReadJSONFile(config, dir, metadata.ConfigDumpFile); err != nil { + return "", fmt.Errorf("failed to read %q: %w", metadata.ConfigDumpFile, err) + } + + if sbID == "" { + // restore into previous sandbox + sbID = dumpSpec.Annotations[annotations.SandboxID] + ctrID = config.ID + } else { + ctrID = "" + } + + ctrMetadata := types.ContainerMetadata{} + originalAnnotations := make(map[string]string) + originalLabels := make(map[string]string) + + if dumpSpec.Annotations[annotations.ContainerManager] == "libpod" { + // This is an import from Podman + ctrMetadata.Name = config.Name + ctrMetadata.Attempt = 0 + } else { + if err := json.Unmarshal([]byte(dumpSpec.Annotations[annotations.Metadata]), &ctrMetadata); err != nil { + return "", fmt.Errorf("failed to read %q: %w", annotations.Metadata, err) + } + + if err := json.Unmarshal([]byte(dumpSpec.Annotations[annotations.Annotations]), &originalAnnotations); err != nil { + return "", fmt.Errorf("failed to read %q: %w", annotations.Annotations, err) + } + + if err := json.Unmarshal([]byte(dumpSpec.Annotations[annotations.Labels]), &originalLabels); err != nil { + return "", fmt.Errorf("failed to read %q: %w", annotations.Labels, err) + } + if sandboxUID != "" { + if _, ok := originalLabels[kubetypes.KubernetesPodUIDLabel]; ok { + originalLabels[kubetypes.KubernetesPodUIDLabel] = sandboxUID + } + if _, ok := originalAnnotations[kubetypes.KubernetesPodUIDLabel]; ok { + originalAnnotations[kubetypes.KubernetesPodUIDLabel] = sandboxUID + } + } + + if createAnnotations != nil { + // The hash also needs to be update or Kubernetes thinks the container needs to be restarted + _, ok1 := createAnnotations["io.kubernetes.container.hash"] + _, ok2 := originalAnnotations["io.kubernetes.container.hash"] + + if ok1 && ok2 { + originalAnnotations["io.kubernetes.container.hash"] = createAnnotations["io.kubernetes.container.hash"] + } + } + } + + sb, err := s.getPodSandboxFromRequest(sbID) + if err != nil { + if err == sandbox.ErrIDEmpty { + return "", err + } + return "", fmt.Errorf("specified sandbox not found: %s: %w", sbID, err) + } + + stopMutex := sb.StopMutex() + stopMutex.RLock() + defer stopMutex.RUnlock() + if sb.Stopped() { + return "", fmt.Errorf("CreateContainer failed as the sandbox was stopped: %s", sb.ID()) + } + + ctr, err := container.New() + if err != nil { + return "", fmt.Errorf("failed to create container: %w", err) + } + + containerConfig := &types.ContainerConfig{ + Metadata: &types.ContainerMetadata{ + Name: ctrMetadata.Name, + Attempt: ctrMetadata.Attempt, + }, + Image: &types.ImageSpec{Image: config.RootfsImageName}, + Linux: &types.LinuxContainerConfig{ + Resources: &types.LinuxContainerResources{}, + SecurityContext: &types.LinuxContainerSecurityContext{}, + }, + Annotations: originalAnnotations, + Labels: originalLabels, + } + + ignoreMounts := map[string]bool{ + "/proc": true, + "/dev": true, + "/dev/pts": true, + "/dev/mqueue": true, + "/sys": true, + "/sys/fs/cgroup": true, + "/dev/shm": true, + "/etc/resolv.conf": true, + "/etc/hostname": true, + "/run/secrets": true, + "/run/.containerenv": true, + } + + for _, m := range dumpSpec.Mounts { + // Following mounts are ignored as they might point to the + // wrong location and if ignored the mounts will correctly + // be setup to point to the new location. + if ignoreMounts[m.Destination] { + continue + } + mount := &types.Mount{ + ContainerPath: m.Destination, + HostPath: m.Source, + } + + for _, createMount := range createMounts { + if createMount.ContainerPath == m.Destination { + mount.HostPath = createMount.HostPath + } + } + + for _, opt := range m.Options { + switch opt { + case "ro": + mount.Readonly = true + case "rprivate": + mount.Propagation = types.MountPropagation_PROPAGATION_PRIVATE + case "rshared": + mount.Propagation = types.MountPropagation_PROPAGATION_BIDIRECTIONAL + case "rslaved": + mount.Propagation = types.MountPropagation_PROPAGATION_HOST_TO_CONTAINER + } + } + + logrus.Debugf("Adding mounts %#v", mount) + containerConfig.Mounts = append(containerConfig.Mounts, mount) + } + sandboxConfig := &types.PodSandboxConfig{ + Metadata: &types.PodSandboxMetadata{ + Name: sb.Metadata().Name, + Uid: sb.Metadata().Uid, + Namespace: sb.Metadata().Namespace, + Attempt: sb.Metadata().Attempt, + }, + Linux: &types.LinuxPodSandboxConfig{}, + } + + if err := ctr.SetConfig(containerConfig, sandboxConfig); err != nil { + return "", fmt.Errorf("setting container config: %w", err) + } + + if err := ctr.SetNameAndID(ctrID); err != nil { + return "", fmt.Errorf("setting container name and ID: %w", err) + } + + if _, err = s.ReserveContainerName(ctr.ID(), ctr.Name()); err != nil { + return "", fmt.Errorf("kubelet may be retrying requests that are timing out in CRI-O due to system load: %w", err) + } + + defer func() { + if retErr != nil { + log.Infof(ctx, "RestoreCtr: releasing container name %s", ctr.Name()) + s.ReleaseContainerName(ctr.Name()) + } + }() + ctr.SetRestore(true) + + newContainer, err := s.createSandboxContainer(ctx, ctr, sb) + if err != nil { + return "", err + } + defer func() { + if retErr != nil { + log.Infof(ctx, "RestoreCtr: deleting container %s from storage", ctr.ID()) + err2 := s.StorageRuntimeServer().DeleteContainer(ctr.ID()) + if err2 != nil { + log.Warnf(ctx, "Failed to cleanup container directory: %v", err2) + } + } + }() + + s.addContainer(newContainer) + + defer func() { + if retErr != nil { + log.Infof(ctx, "RestoreCtr: removing container %s", newContainer.ID()) + s.removeContainer(newContainer) + } + }() + + if err := s.CtrIDIndex().Add(ctr.ID()); err != nil { + return "", err + } + defer func() { + if retErr != nil { + log.Infof(ctx, "RestoreCtr: deleting container ID %s from idIndex", ctr.ID()) + if err := s.CtrIDIndex().Delete(ctr.ID()); err != nil { + log.Warnf(ctx, "Couldn't delete ctr id %s from idIndex", ctr.ID()) + } + } + }() + + newContainer.SetCreated() + newContainer.SetRestore(true) + newContainer.SetRestoreArchive(input) + + if ctx.Err() == context.Canceled || ctx.Err() == context.DeadlineExceeded { + log.Infof(ctx, "RestoreCtr: context was either canceled or the deadline was exceeded: %v", ctx.Err()) + return "", ctx.Err() + } + return ctr.ID(), nil +} diff --git a/server/container_restore_test.go b/server/container_restore_test.go new file mode 100644 index 00000000000..4bb96d85609 --- /dev/null +++ b/server/container_restore_test.go @@ -0,0 +1,282 @@ +package server_test + +import ( + "context" + "io" + "os" + + "github.com/containers/podman/v4/pkg/criu" + "github.com/containers/storage/pkg/archive" + "github.com/cri-o/cri-o/internal/oci" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +var _ = t.Describe("ContainerRestore", func() { + // Prepare the sut + BeforeEach(func() { + if !criu.CheckForCriu(criu.PodCriuVersion) { + Skip("CRIU is missing or too old.") + } + beforeEach() + createDummyConfig() + mockRuncInLibConfig() + serverConfig.SetCheckpointRestore(true) + setupSUT() + }) + + AfterEach(func() { + afterEach() + os.RemoveAll("config.dump") + os.RemoveAll("cp.tar") + os.RemoveAll("dump.log") + os.RemoveAll("spec.dump") + }) + + t.Describe("ContainerRestore from archive into new pod", func() { + It("should fail because archive does not exist", func() { + // Given + // When + _, err := sut.CRImportCheckpoint( + context.Background(), + "does-not-exist.tar", + "", + "", + nil, + nil, + ) + + // Then + Expect(err.Error()).To(Equal(`failed to open checkpoint archive does-not-exist.tar for import: open does-not-exist.tar: no such file or directory`)) + }) + }) + t.Describe("ContainerRestore from archive into new pod", func() { + It("should fail because archive is an empty file", func() { + // Given + archive, err := os.OpenFile("empty.tar", os.O_RDONLY|os.O_CREATE, 0o644) + Expect(err).To(BeNil()) + archive.Close() + defer os.RemoveAll("empty.tar") + // When + _, err = sut.CRImportCheckpoint( + context.Background(), + "empty.tar", + "", + "", + nil, + nil, + ) + // Then + Expect(err.Error()).To(ContainSubstring(`failed to read "spec.dump": failed to read`)) + }) + }) + t.Describe("ContainerRestore from archive into new pod", func() { + It("should fail because archive is not a tar file", func() { + // Given + err := os.WriteFile("no.tar", []byte("notar"), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("no.tar") + // When + _, err = sut.CRImportCheckpoint( + context.Background(), + "empty.tar", + "", + "", + nil, + nil, + ) + // Then + Expect(err.Error()).To(Equal(`failed to open checkpoint archive empty.tar for import: open empty.tar: no such file or directory`)) + }) + }) + t.Describe("ContainerRestore from archive into new pod", func() { + It("should fail because archive contains broken spec.dump", func() { + // Given + err := os.WriteFile("spec.dump", []byte("not json"), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("spec.dump") + outFile, err := os.Create("archive.tar") + Expect(err).To(BeNil()) + defer outFile.Close() + input, err := archive.TarWithOptions(".", &archive.TarOptions{ + Compression: archive.Uncompressed, + IncludeSourceDir: true, + IncludeFiles: []string{"spec.dump"}, + }) + Expect(err).To(BeNil()) + defer os.RemoveAll("archive.tar") + _, err = io.Copy(outFile, input) + Expect(err).To(BeNil()) + // When + _, err = sut.CRImportCheckpoint( + context.Background(), + "archive.tar", + "", + "", + nil, + nil, + ) + // Then + Expect(err.Error()).To(ContainSubstring(`failed to read "spec.dump": failed to unmarshal `)) + }) + }) + t.Describe("ContainerRestore from archive into new pod", func() { + It("should fail because archive contains empty config.dump and spec.dump", func() { + // Given + err := os.WriteFile("spec.dump", []byte("{}"), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("spec.dump") + err = os.WriteFile("config.dump", []byte("{}"), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("config.dump") + outFile, err := os.Create("archive.tar") + Expect(err).To(BeNil()) + defer outFile.Close() + input, err := archive.TarWithOptions(".", &archive.TarOptions{ + Compression: archive.Uncompressed, + IncludeSourceDir: true, + IncludeFiles: []string{"spec.dump", "config.dump"}, + }) + Expect(err).To(BeNil()) + defer os.RemoveAll("archive.tar") + _, err = io.Copy(outFile, input) + Expect(err).To(BeNil()) + // When + _, err = sut.CRImportCheckpoint( + context.Background(), + "archive.tar", + "", + "", + nil, + nil, + ) + + // Then + Expect(err.Error()).To(ContainSubstring(`failed to read "io.kubernetes.cri-o.Metadata": unexpected end of JSON input`)) + }) + }) + t.Describe("ContainerRestore from archive into new pod", func() { + It("should fail because archive contains broken config.dump", func() { + // Given + outFile, err := os.Create("archive.tar") + Expect(err).To(BeNil()) + defer outFile.Close() + err = os.WriteFile("config.dump", []byte("not json"), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("config.dump") + err = os.WriteFile("spec.dump", []byte("{}"), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("spec.dump") + input, err := archive.TarWithOptions(".", &archive.TarOptions{ + Compression: archive.Uncompressed, + IncludeSourceDir: true, + IncludeFiles: []string{"spec.dump", "config.dump"}, + }) + Expect(err).To(BeNil()) + defer os.RemoveAll("archive.tar") + _, err = io.Copy(outFile, input) + Expect(err).To(BeNil()) + // When + + _, err = sut.CRImportCheckpoint( + context.Background(), + "archive.tar", + "", + "", + nil, + nil, + ) + + // Then + Expect(err.Error()).To(ContainSubstring(`failed to read "config.dump": failed to unmarshal`)) + }) + }) + t.Describe("ContainerRestore from archive into new pod", func() { + It("should fail because archive contains empty config.dump", func() { + // Given + addContainerAndSandbox() + + err := os.WriteFile( + "spec.dump", + []byte(`{"annotations":{"io.kubernetes.cri-o.Metadata":"{\"name\":\"container-to-restore\"}"}}`), + 0o644, + ) + Expect(err).To(BeNil()) + defer os.RemoveAll("spec.dump") + err = os.WriteFile("config.dump", []byte("{}"), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("config.dump") + outFile, err := os.Create("archive.tar") + Expect(err).To(BeNil()) + defer outFile.Close() + input, err := archive.TarWithOptions(".", &archive.TarOptions{ + Compression: archive.Uncompressed, + IncludeSourceDir: true, + IncludeFiles: []string{"spec.dump", "config.dump"}, + }) + Expect(err).To(BeNil()) + defer os.RemoveAll("archive.tar") + _, err = io.Copy(outFile, input) + Expect(err).To(BeNil()) + // When + + _, err = sut.CRImportCheckpoint( + context.Background(), + "archive.tar", + "", + "", + nil, + nil, + ) + + // Then + Expect(err.Error()).To(Equal(`failed to read "io.kubernetes.cri-o.Annotations": unexpected end of JSON input`)) + }) + }) + t.Describe("ContainerRestore from archive into new pod", func() { + It("should fail because archive contains no actual checkpoint", func() { + // Given + addContainerAndSandbox() + testContainer.SetStateAndSpoofPid(&oci.ContainerState{ + State: specs.State{Status: oci.ContainerStateRunning}, + }) + + err := os.WriteFile( + "spec.dump", + []byte(`{"annotations":{"io.kubernetes.cri-o.Metadata":"{\"name\":\"container-to-restore\"}"}}`), + 0o644, + ) + Expect(err).To(BeNil()) + defer os.RemoveAll("spec.dump") + err = os.WriteFile("config.dump", []byte(`{"rootfsImageName": "image"}`), 0o644) + Expect(err).To(BeNil()) + defer os.RemoveAll("config.dump") + outFile, err := os.Create("archive.tar") + Expect(err).To(BeNil()) + defer outFile.Close() + input, err := archive.TarWithOptions(".", &archive.TarOptions{ + Compression: archive.Uncompressed, + IncludeSourceDir: true, + IncludeFiles: []string{"spec.dump", "config.dump"}, + }) + Expect(err).To(BeNil()) + defer os.RemoveAll("archive.tar") + _, err = io.Copy(outFile, input) + Expect(err).To(BeNil()) + // When + + _, err = sut.CRImportCheckpoint( + context.Background(), + "archive.tar", + "", + "", + nil, + nil, + ) + + // Then + Expect(err.Error()).To(Equal(`failed to read "io.kubernetes.cri-o.Annotations": unexpected end of JSON input`)) + }) + }) +}) diff --git a/server/container_start.go b/server/container_start.go index 93c5476b554..2449e503780 100644 --- a/server/container_start.go +++ b/server/container_start.go @@ -3,6 +3,8 @@ package server import ( "fmt" + "github.com/containers/podman/v4/libpod" + "github.com/cri-o/cri-o/internal/lib" "github.com/cri-o/cri-o/internal/log" oci "github.com/cri-o/cri-o/internal/oci" "github.com/cri-o/cri-o/internal/runtimehandlerhooks" @@ -19,6 +21,41 @@ func (s *Server) StartContainer(ctx context.Context, req *types.StartContainerRe if err != nil { return status.Errorf(codes.NotFound, "could not find container %q: %v", req.ContainerId, err) } + + if c.Restore() { + // If the create command found a checkpoint image, the container + // has the restore flag set to true. At this point we need to jump + // into the restore code. + log.Debugf(ctx, "Restoring container %q", req.ContainerId) + + ctr, err := s.ContainerServer.ContainerRestore( + ctx, + &lib.ContainerCheckpointRestoreOptions{ + Container: c.ID(), + Pod: s.getSandbox(c.Sandbox()).ID(), + ContainerCheckpointOptions: libpod.ContainerCheckpointOptions{ + TargetFile: c.ImageName(), + }, + }, + ) + if err != nil { + ociContainer, err1 := s.GetContainerFromShortID(c.ID()) + if err1 != nil { + return fmt.Errorf("failed to find container %s: %v", c.ID(), err1) + } + s.ReleaseContainerName(ociContainer.Name()) + err2 := s.StorageRuntimeServer().DeleteContainer(c.ID()) + if err2 != nil { + log.Warnf(ctx, "Failed to cleanup container directory: %v", err2) + } + s.removeContainer(ociContainer) + return err + } + + log.Infof(ctx, "Restored container: %s", ctr) + return nil + } + state := c.State() if state.Status != oci.ContainerStateCreated { return fmt.Errorf("container %s is not in created state: %s", c.ID(), state.Status) diff --git a/server/cri/v1/rpc_checkpoint_container.go b/server/cri/v1/rpc_checkpoint_container.go index c39cf78d44a..b69a93bf936 100644 --- a/server/cri/v1/rpc_checkpoint_container.go +++ b/server/cri/v1/rpc_checkpoint_container.go @@ -7,6 +7,7 @@ import ( ) func (s *service) CheckpointContainer(ctx context.Context, req *pb.CheckpointContainerRequest) (*pb.CheckpointContainerResponse, error) { - // Spoofing the CheckpointContainer method to return nil response until actual support code is added. - return nil, nil + result := s.server.CheckpointContainer(ctx, req) + + return &pb.CheckpointContainerResponse{}, result } diff --git a/server/sandbox_run_linux.go b/server/sandbox_run_linux.go index 61031600820..ba9e84e75e4 100644 --- a/server/sandbox_run_linux.go +++ b/server/sandbox_run_linux.go @@ -667,6 +667,8 @@ func (s *Server) runPodSandbox(ctx context.Context, req *types.RunPodSandboxRequ return nil, err } + sb.SetDNSConfig(sbox.Config().DnsConfig) + if err := s.addSandbox(sb); err != nil { return nil, err } diff --git a/server/suite_test.go b/server/suite_test.go index 0a5a3fcc5d8..e60dc7e66b3 100644 --- a/server/suite_test.go +++ b/server/suite_test.go @@ -153,7 +153,7 @@ var beforeEach = func() { serverConfig.HooksDir = []string{emptyDir} // Initialize test container and sandbox - testSandbox, err = sandbox.New(sandboxID, "", "", "", "", + testSandbox, err = sandbox.New(sandboxID, "", "", "", ".", make(map[string]string), make(map[string]string), "", "", &types.PodSandboxMetadata{}, "", "", false, "", "", "", []*hostport.PortMapping{}, false, time.Now(), "") @@ -232,6 +232,10 @@ func createDummyState() { Expect(os.WriteFile("state.json", []byte(`{}`), 0o644)).To(BeNil()) } +func createDummyConfig() { + Expect(os.WriteFile("config.json", []byte(`{"linux":{},"process":{}}`), 0o644)).To(BeNil()) +} + func mockRuncInLibConfig() { echo, err := exec.LookPath("echo") Expect(err).To(BeNil()) diff --git a/test/checkcriu/checkcriu.go b/test/checkcriu/checkcriu.go new file mode 100644 index 00000000000..2e869a1c43a --- /dev/null +++ b/test/checkcriu/checkcriu.go @@ -0,0 +1,15 @@ +package main + +import ( + "os" + + "github.com/containers/podman/v4/pkg/criu" +) + +func main() { + if !criu.CheckForCriu(criu.PodCriuVersion) { + os.Exit(1) + } + + os.Exit(0) +} diff --git a/test/checkpoint.bats b/test/checkpoint.bats new file mode 100644 index 00000000000..6ff94c3a7bd --- /dev/null +++ b/test/checkpoint.bats @@ -0,0 +1,31 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + if [[ $RUNTIME_TYPE == pod ]]; then + skip "not yet supported by conmonrs" + fi + has_criu + setup_test +} + +function teardown() { + cleanup_test +} + +@test "checkpoint and restore one container into a new pod using --export" { + CONTAINER_DROP_INFRA_CTR=false CONTAINER_ENABLE_CRIU_SUPPORT=true start_crio + pod_id=$(crictl runp "$TESTDATA"/sandbox_config.json) + ctr_id=$(crictl create "$pod_id" "$TESTDATA"/container_sleep.json "$TESTDATA"/sandbox_config.json) + crictl start "$ctr_id" + crictl checkpoint --export="$TESTDIR"/cp.tar "$ctr_id" + crictl rmp -f "$pod_id" + pod_id=$(crictl runp "$TESTDATA"/sandbox_config.json) + # Replace original container with checkpoint image + jq ".image.image=\"$TESTDIR/cp.tar\"" "$TESTDATA"/container_sleep.json > "$TESTDATA"/restore.json + ctr_id=$(crictl create "$pod_id" "$TESTDATA"/restore.json "$TESTDATA"/sandbox_config.json) + rm -f "$TESTDATA"/restore.json + crictl start "$ctr_id" + crictl rmp -f "$pod_id" +} diff --git a/test/helpers.bash b/test/helpers.bash index 47a00659c3e..028ff35310e 100644 --- a/test/helpers.bash +++ b/test/helpers.bash @@ -22,6 +22,9 @@ CRIO_STATUS_BINARY_PATH=${CRIO_STATUS_BINARY_PATH:-${CRIO_ROOT}/bin/crio-status} # Path to the pinns binary PINNS_BINARY_PATH=${PINNS_BINARY_PATH:-${CRIO_ROOT}/bin/pinns} +# Path to the pinns binary +CRIOCTL_BINARY_PATH=${CRIOCTL_BINARY_PATH:-${CRIO_ROOT}/bin/crioctl} + # Path of the crictl binary. CRICTL_PATH=$(command -v crictl || true) CRICTL_BINARY=${CRICTL_PATH:-/usr/bin/crictl} @@ -34,6 +37,8 @@ CONTAINER_SECCOMP_PROFILE=${CONTAINER_SECCOMP_PROFILE:-${CRIO_ROOT}/vendor/githu CONTAINER_UID_MAPPINGS=${CONTAINER_UID_MAPPINGS:-} CONTAINER_GID_MAPPINGS=${CONTAINER_GID_MAPPINGS:-} OVERRIDE_OPTIONS=${OVERRIDE_OPTIONS:-} +# CNI path +CONTAINER_CNI_PLUGIN_DIR=${CONTAINER_CNI_PLUGIN_DIR:-/opt/cni/bin} # Runtime CONTAINER_DEFAULT_RUNTIME=${CONTAINER_DEFAULT_RUNTIME:-runc} RUNTIME_BINARY_PATH=$(command -v "$CONTAINER_DEFAULT_RUNTIME") @@ -60,6 +65,8 @@ COPYIMG_BINARY=${COPYIMG_BINARY:-${CRIO_ROOT}/test/copyimg/copyimg} ARTIFACTS_PATH=${ARTIFACTS_PATH:-${CRIO_ROOT}/.artifacts} # Path of the checkseccomp binary. CHECKSECCOMP_BINARY=${CHECKSECCOMP_BINARY:-${CRIO_ROOT}/test/checkseccomp/checkseccomp} +# Path of the checkcriu binary. +CHECKCRIU_BINARY=${CHECKCRIU_BINARY:-${CRIO_ROOT}/test/checkcriu/checkcriu} # The default log directory where all logs will go unless directly specified by the kubelet DEFAULT_LOG_PATH=${DEFAULT_LOG_PATH:-/var/log/crio/pods} # Cgroup manager to be used @@ -162,7 +169,7 @@ function setup_test() { # Copy all the CNI dependencies around to ensure encapsulated tests CRIO_CNI_PLUGIN="$TESTDIR/cni-bin" mkdir "$CRIO_CNI_PLUGIN" - cp /opt/cni/bin/* "$CRIO_CNI_PLUGIN" + cp "$CONTAINER_CNI_PLUGIN_DIR"/* "$CRIO_CNI_PLUGIN" cp "$INTEGRATION_ROOT"/cni_plugin_helper.bash "$CRIO_CNI_PLUGIN" sed -i "s;%TEST_DIR%;$TESTDIR;" "$CRIO_CNI_PLUGIN"/cni_plugin_helper.bash @@ -197,6 +204,11 @@ function crictl() { "$CRICTL_BINARY" -t 10m --config "$CRICTL_CONFIG_FILE" -r "unix://$CRIO_SOCKET" -i "unix://$CRIO_SOCKET" "$@" } +# Run crictl using the binary specified by $CRICTL_BINARY. +function crioctl() { + "$CRIOCTL_BINARY_PATH" -d --socket "$CRIO_SOCKET" "$@" +} + # Run the runtime binary with the specified RUNTIME_ROOT function runtime() { "$RUNTIME_BINARY_PATH" --root "$RUNTIME_ROOT" "$@" @@ -658,3 +670,21 @@ function setup_kubensmnt() { $PINNS_BINARY_PATH -d "$PIN_ROOT" -f mnt -m export KUBENSMNT=$PINNED_MNT_NS } + +function has_criu() { + if [ -n "$TEST_USERNS" ]; then + skip "Cannot run CRIU tests in user namespace." + fi + + if [[ "$CONTAINER_DEFAULT_RUNTIME" != "runc" ]]; then + skip "Checkpoint/Restore with pods only works in runc." + fi + + if [ ! -e "$(command -v criu)" ]; then + skip "CRIU binary not found" + fi + + if ! "$CHECKCRIU_BINARY"; then + skip "CRIU too old. At least 3.16 needed." + fi +} diff --git a/test/mocks/oci/oci.go b/test/mocks/oci/oci.go index d0c7f08ee4a..9049cd32119 100644 --- a/test/mocks/oci/oci.go +++ b/test/mocks/oci/oci.go @@ -54,6 +54,20 @@ func (mr *MockRuntimeImplMockRecorder) AttachContainer(arg0, arg1, arg2, arg3, a return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AttachContainer", reflect.TypeOf((*MockRuntimeImpl)(nil).AttachContainer), arg0, arg1, arg2, arg3, arg4, arg5, arg6) } +// CheckpointContainer mocks base method. +func (m *MockRuntimeImpl) CheckpointContainer(arg0 context.Context, arg1 *oci.Container, arg2 *specs.Spec, arg3 bool) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CheckpointContainer", arg0, arg1, arg2, arg3) + ret0, _ := ret[0].(error) + return ret0 +} + +// CheckpointContainer indicates an expected call of CheckpointContainer. +func (mr *MockRuntimeImplMockRecorder) CheckpointContainer(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CheckpointContainer", reflect.TypeOf((*MockRuntimeImpl)(nil).CheckpointContainer), arg0, arg1, arg2, arg3) +} + // ContainerStats mocks base method. func (m *MockRuntimeImpl) ContainerStats(arg0 context.Context, arg1 *oci.Container, arg2 string) (*v1.ContainerStats, error) { m.ctrl.T.Helper() @@ -70,17 +84,17 @@ func (mr *MockRuntimeImplMockRecorder) ContainerStats(arg0, arg1, arg2 interface } // CreateContainer mocks base method. -func (m *MockRuntimeImpl) CreateContainer(arg0 context.Context, arg1 *oci.Container, arg2 string) error { +func (m *MockRuntimeImpl) CreateContainer(arg0 context.Context, arg1 *oci.Container, arg2 string, arg3 bool) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateContainer", arg0, arg1, arg2) + ret := m.ctrl.Call(m, "CreateContainer", arg0, arg1, arg2, arg3) ret0, _ := ret[0].(error) return ret0 } // CreateContainer indicates an expected call of CreateContainer. -func (mr *MockRuntimeImplMockRecorder) CreateContainer(arg0, arg1, arg2 interface{}) *gomock.Call { +func (mr *MockRuntimeImplMockRecorder) CreateContainer(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateContainer", reflect.TypeOf((*MockRuntimeImpl)(nil).CreateContainer), arg0, arg1, arg2) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateContainer", reflect.TypeOf((*MockRuntimeImpl)(nil).CreateContainer), arg0, arg1, arg2, arg3) } // DeleteContainer mocks base method. @@ -168,6 +182,20 @@ func (mr *MockRuntimeImplMockRecorder) ReopenContainerLog(arg0, arg1 interface{} return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReopenContainerLog", reflect.TypeOf((*MockRuntimeImpl)(nil).ReopenContainerLog), arg0, arg1) } +// RestoreContainer mocks base method. +func (m *MockRuntimeImpl) RestoreContainer(arg0 context.Context, arg1 *oci.Container, arg2 *specs.Spec, arg3 int, arg4 string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RestoreContainer", arg0, arg1, arg2, arg3, arg4) + ret0, _ := ret[0].(error) + return ret0 +} + +// RestoreContainer indicates an expected call of RestoreContainer. +func (mr *MockRuntimeImplMockRecorder) RestoreContainer(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RestoreContainer", reflect.TypeOf((*MockRuntimeImpl)(nil).RestoreContainer), arg0, arg1, arg2, arg3, arg4) +} + // SignalContainer mocks base method. func (m *MockRuntimeImpl) SignalContainer(arg0 context.Context, arg1 *oci.Container, arg2 syscall.Signal) error { m.ctrl.T.Helper() diff --git a/vendor/modules.txt b/vendor/modules.txt index 1550fd64d71..49e378ad590 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -225,8 +225,6 @@ github.com/acarl005/stripansi # github.com/acomagu/bufpipe v1.0.3 ## explicit; go 1.12 github.com/acomagu/bufpipe -# github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be -## explicit; go 1.13 # github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d ## explicit; go 1.13 github.com/asaskevich/govalidator