diff --git a/completions/bash/crio b/completions/bash/crio index 51cb938b239..d15429b9fe7 100755 --- a/completions/bash/crio +++ b/completions/bash/crio @@ -37,6 +37,7 @@ h --default-sysctls --default-transport --default-ulimits +--drop-infra-ctr --enable-metrics --gid-mappings --global-auth-file diff --git a/completions/fish/crio.fish b/completions/fish/crio.fish index a1c0a30f805..62dd00afc08 100644 --- a/completions/fish/crio.fish +++ b/completions/fish/crio.fish @@ -43,6 +43,7 @@ complete -c crio -n '__fish_crio_no_subcommand' -f -l default-runtime -r -d 'Def complete -c crio -n '__fish_crio_no_subcommand' -f -l default-sysctls -r -d 'Sysctls to add to the containers' complete -c crio -n '__fish_crio_no_subcommand' -f -l default-transport -r -d 'A prefix to prepend to image names that cannot be pulled as-is' complete -c crio -n '__fish_crio_no_subcommand' -f -l default-ulimits -r -d 'Ulimits to apply to containers by default (name=soft:hard) (default: [])' +complete -c crio -n '__fish_crio_no_subcommand' -f -l drop-infra-ctr -d 'Determines whether pods are created without an infra container (when the pod is not using a pod level PID namespace). Requires ManageNSLifecycle to be true (default: false)' complete -c crio -n '__fish_crio_no_subcommand' -f -l enable-metrics -d 'Enable metrics endpoint for the server on localhost:9090' complete -c crio -n '__fish_crio_no_subcommand' -f -l gid-mappings -r -d 'Specify the GID mappings to use for the user namespace (default: "")' complete -c crio -n '__fish_crio_no_subcommand' -l global-auth-file -r -d 'Path to a file like /var/lib/kubelet/config.json holding credentials necessary for pulling images from secure registries (default: "")' diff --git a/completions/zsh/_crio b/completions/zsh/_crio index acbd96cbcb3..0c70c0d131d 100644 --- a/completions/zsh/_crio +++ b/completions/zsh/_crio @@ -7,7 +7,7 @@ it later with **--config**. Global options will modify the output.' 'version:dis _describe 'commands' cmds local -a opts - opts=('--additional-devices' '--allow-userns-annotation' '--apparmor-profile' '--big-files-temporary-dir' '--bind-mount-prefix' '--cgroup-manager' '--cni-config-dir' '--cni-default-network' '--cni-plugin-dir' '--config' '--config-dir' '--conmon' '--conmon-cgroup' '--conmon-env' '--container-attach-socket-dir' '--container-exits-dir' '--ctr-stop-timeout' '--decryption-keys-path' '--default-capabilities' '--default-env' '--default-mounts-file' '--default-runtime' '--default-sysctls' '--default-transport' '--default-ulimits' '--enable-metrics' '--gid-mappings' '--global-auth-file' '--grpc-max-recv-msg-size' '--grpc-max-send-msg-size' '--hooks-dir' '--image-volumes' '--insecure-registry' '--listen' '--log' '--log-dir' '--log-filter' '--log-format' '--log-journald' '--log-level' '--log-size-max' '--manage-ns-lifecycle' '--metrics-port' '--metrics-socket' '--namespaces-dir' '--no-pivot' '--pause-command' '--pause-image' '--pause-image-auth-file' '--pids-limit' '--pinns-path' '--profile' '--profile-port' '--read-only' '--registries-conf' '--registry' '--root' '--runroot' '--runtimes' '--seccomp-profile' '--selinux' '--signature-policy' '--storage-driver' '--storage-opt' '--stream-address' '--stream-enable-tls' '--stream-port' '--stream-tls-ca' '--stream-tls-cert' '--stream-tls-key' '--uid-mappings' '--version-file' '--version-file-persist' '--help' '--version') + opts=('--additional-devices' '--allow-userns-annotation' '--apparmor-profile' '--big-files-temporary-dir' '--bind-mount-prefix' '--cgroup-manager' '--cni-config-dir' '--cni-default-network' '--cni-plugin-dir' '--config' '--config-dir' '--conmon' '--conmon-cgroup' '--conmon-env' '--container-attach-socket-dir' '--container-exits-dir' '--ctr-stop-timeout' '--decryption-keys-path' '--default-capabilities' '--default-env' '--default-mounts-file' '--default-runtime' '--default-sysctls' '--default-transport' '--default-ulimits' '--drop-infra-ctr' '--enable-metrics' '--gid-mappings' '--global-auth-file' '--grpc-max-recv-msg-size' '--grpc-max-send-msg-size' '--hooks-dir' '--image-volumes' '--insecure-registry' '--listen' '--log' '--log-dir' '--log-filter' '--log-format' '--log-journald' '--log-level' '--log-size-max' '--manage-ns-lifecycle' '--metrics-port' '--metrics-socket' '--namespaces-dir' '--no-pivot' '--pause-command' '--pause-image' '--pause-image-auth-file' '--pids-limit' '--pinns-path' '--profile' '--profile-port' '--read-only' '--registries-conf' '--registry' '--root' '--runroot' '--runtimes' '--seccomp-profile' '--selinux' '--signature-policy' '--storage-driver' '--storage-opt' '--stream-address' '--stream-enable-tls' '--stream-port' '--stream-tls-ca' '--stream-tls-cert' '--stream-tls-key' '--uid-mappings' '--version-file' '--version-file-persist' '--help' '--version') _describe 'global options' opts return diff --git a/docs/crio.8.md b/docs/crio.8.md index 9231216007c..d6038195682 100644 --- a/docs/crio.8.md +++ b/docs/crio.8.md @@ -36,6 +36,7 @@ crio [--default-sysctls]=[value] [--default-transport]=[value] [--default-ulimits]=[value] +[--drop-infra-ctr] [--enable-metrics] [--gid-mappings]=[value] [--global-auth-file]=[value] @@ -170,6 +171,8 @@ crio [GLOBAL OPTIONS] command [COMMAND OPTIONS] [ARGUMENTS...] **--default-ulimits**="": Ulimits to apply to containers by default (name=soft:hard) (default: []) (default: []) +**--drop-infra-ctr**: Determines whether pods are created without an infra container (when the pod is not using a pod level PID namespace). Requires ManageNSLifecycle to be true (default: false) + **--enable-metrics**: Enable metrics endpoint for the server on localhost:9090 **--gid-mappings**="": Specify the GID mappings to use for the user namespace (default: "") diff --git a/docs/crio.conf.5.md b/docs/crio.conf.5.md index 5728afa8e8b..81e7e730795 100644 --- a/docs/crio.conf.5.md +++ b/docs/crio.conf.5.md @@ -222,6 +222,10 @@ the container runtime configuration. **manage_ns_lifecycle**=true Determines whether we pin and remove namespaces and manage their lifecycle. +**drop_infra_ctr**=false + Determines whether we drop the infra container when a pod does not have a private PID namespace, and does not use a kernel separating runtime (like kata). + Requies **manage_ns_lifecycle** to be true. + **namespaces_dir**="/var/run" The directory where the state of the managed namespaces gets tracked. Only used when manage_ns_lifecycle is true diff --git a/internal/criocli/criocli.go b/internal/criocli/criocli.go index e022dbfa725..30a735f6988 100644 --- a/internal/criocli/criocli.go +++ b/internal/criocli/criocli.go @@ -248,6 +248,9 @@ func mergeConfig(config *libconfig.Config, ctx *cli.Context) error { if ctx.IsSet("manage-ns-lifecycle") { config.ManageNSLifecycle = ctx.Bool("manage-ns-lifecycle") } + if ctx.IsSet("drop-infra-ctr") { + config.DropInfraCtr = ctx.Bool("drop-infra-ctr") + } if ctx.IsSet("namespaces-dir") { config.NamespacesDir = ctx.String("namespaces-dir") } @@ -725,6 +728,11 @@ func getCrioFlags(defConf *libconfig.Config) []cli.Flag { Usage: fmt.Sprintf("Determines whether we pin and remove IPC, network and UTS namespaces and manage their lifecycle (default: %v)", defConf.ManageNSLifecycle), EnvVars: []string{"CONTAINER_MANAGE_NS_LIFECYCLE"}, }, + &cli.BoolFlag{ + Name: "drop-infra-ctr", + Usage: fmt.Sprintf("Determines whether pods are created without an infra container (when the pod is not using a pod level PID namespace). Requires ManageNSLifecycle to be true (default: %v)", defConf.DropInfraCtr), + EnvVars: []string{"CONTAINER_DROP_INFRA_CTR"}, + }, &cli.StringFlag{ Name: "pinns-path", Usage: fmt.Sprintf("The path to find the pinns binary, which is needed to manage namespace lifecycle. Will be searched for in $PATH if empty (default: %q)", defConf.PinnsPath), diff --git a/internal/lib/container_server.go b/internal/lib/container_server.go index 780d69e09d4..364c6ef2951 100644 --- a/internal/lib/container_server.go +++ b/internal/lib/container_server.go @@ -16,6 +16,7 @@ import ( "github.com/cri-o/cri-o/internal/lib/sandbox" "github.com/cri-o/cri-o/internal/oci" "github.com/cri-o/cri-o/internal/storage" + crioann "github.com/cri-o/cri-o/pkg/annotations" libconfig "github.com/cri-o/cri-o/pkg/config" json "github.com/json-iterator/go" rspec "github.com/opencontainers/runtime-spec/specs-go" @@ -31,10 +32,6 @@ import ( // `io.container.manager`. const ContainerManagerCRIO = "cri-o" -// UsernsMode is the user namespace mode to use -// TODO: move to the annotations pkg. -const UsernsModeAnnotation = "io.kubernetes.cri-o.userns-mode" - // ContainerServer implements the ImageServer type ContainerServer struct { runtime *oci.Runtime @@ -203,7 +200,7 @@ func (c *ContainerServer) LoadSandbox(id string) (retErr error) { return errors.Wrap(err, "parsing created timestamp annotation") } - sb, err := sandbox.New(id, m.Annotations[annotations.Namespace], name, m.Annotations[annotations.KubeName], filepath.Dir(m.Annotations[annotations.LogPath]), labels, kubeAnnotations, processLabel, mountLabel, &metadata, m.Annotations[annotations.ShmPath], m.Annotations[annotations.CgroupParent], privileged, m.Annotations[annotations.RuntimeHandler], m.Annotations[annotations.ResolvPath], m.Annotations[annotations.HostName], portMappings, hostNetwork, created, m.Annotations[UsernsModeAnnotation]) + sb, err := sandbox.New(id, m.Annotations[annotations.Namespace], name, m.Annotations[annotations.KubeName], filepath.Dir(m.Annotations[annotations.LogPath]), labels, kubeAnnotations, processLabel, mountLabel, &metadata, m.Annotations[annotations.ShmPath], m.Annotations[annotations.CgroupParent], privileged, m.Annotations[annotations.RuntimeHandler], m.Annotations[annotations.ResolvPath], m.Annotations[annotations.HostName], portMappings, hostNetwork, created, m.Annotations[crioann.UsernsModeAnnotation]) if err != nil { return err } @@ -262,7 +259,9 @@ func (c *ContainerServer) LoadSandbox(id string) (retErr error) { return err } - cname, err := c.ReserveContainerName(m.Annotations[annotations.ContainerID], m.Annotations[annotations.ContainerName]) + cID := m.Annotations[annotations.ContainerID] + + cname, err := c.ReserveContainerName(cID, m.Annotations[annotations.ContainerName]) if err != nil { return err } @@ -272,21 +271,34 @@ func (c *ContainerServer) LoadSandbox(id string) (retErr error) { } }() - scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], labels, m.Annotations, kubeAnnotations, m.Annotations[annotations.Image], "", "", nil, id, false, false, false, sb.RuntimeHandler(), sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) - if err != nil { - return err + var scontainer *oci.Container + + // We should not take whether the server currently has DropInfraCtr specified, but rather + // whether the server used to. + wasSpoofed := false + if spoofed, ok := m.Annotations[crioann.SpoofedContainer]; ok && spoofed == "true" { + wasSpoofed = true } - scontainer.SetSpec(&m) - scontainer.SetMountPoint(m.Annotations[annotations.MountPoint]) - if m.Annotations[annotations.Volumes] != "" { - containerVolumes := []oci.ContainerVolume{} - if err = json.Unmarshal([]byte(m.Annotations[annotations.Volumes]), &containerVolumes); err != nil { - return fmt.Errorf("failed to unmarshal container volumes: %v", err) + if !wasSpoofed { + scontainer, err = oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], labels, m.Annotations, kubeAnnotations, m.Annotations[annotations.Image], "", "", nil, id, false, false, false, sb.RuntimeHandler(), sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) + if err != nil { + return err } - for _, cv := range containerVolumes { - scontainer.AddVolume(cv) + scontainer.SetSpec(&m) + scontainer.SetMountPoint(m.Annotations[annotations.MountPoint]) + + if m.Annotations[annotations.Volumes] != "" { + containerVolumes := []oci.ContainerVolume{} + if err = json.Unmarshal([]byte(m.Annotations[annotations.Volumes]), &containerVolumes); err != nil { + return fmt.Errorf("failed to unmarshal container volumes: %v", err) + } + for _, cv := range containerVolumes { + scontainer.AddVolume(cv) + } } + } else { + scontainer = oci.NewSpoofedContainer(cID, cname, labels, created, sandboxPath) } if err := c.ContainerStateFromDisk(scontainer); err != nil { @@ -296,14 +308,37 @@ func (c *ContainerServer) LoadSandbox(id string) (retErr error) { // We write back the state because it is possible that crio did not have a chance to // read the exit file and persist exit code into the state on reboot. if err := c.ContainerStateToDisk(scontainer); err != nil { - return fmt.Errorf("failed to write container state to disk %q: %v", scontainer.ID(), err) + return fmt.Errorf("failed to write container %q state to disk: %v", scontainer.ID(), err) } - sb.SetCreated() - if err := label.ReserveLabel(processLabel); err != nil { + if err := sb.SetInfraContainer(scontainer); err != nil { return err } - if err := sb.SetInfraContainer(scontainer); err != nil { + + // We add an NS only if we can load a permanent one. + // Otherwise, the sandbox will live in the host namespace. + if c.config.ManageNSLifecycle || wasSpoofed { + namespacesToJoin := []struct { + rspecNS rspec.LinuxNamespaceType + joinFunc func(string) error + }{ + {rspecNS: rspec.NetworkNamespace, joinFunc: sb.NetNsJoin}, + {rspecNS: rspec.IPCNamespace, joinFunc: sb.IpcNsJoin}, + {rspecNS: rspec.UTSNamespace, joinFunc: sb.UtsNsJoin}, + {rspecNS: rspec.UserNamespace, joinFunc: sb.UserNsJoin}, + } + for _, namespaceToJoin := range namespacesToJoin { + path, err := configNsPath(&m, namespaceToJoin.rspecNS) + if err == nil { + if nsErr := namespaceToJoin.joinFunc(path); err != nil { + return nsErr + } + } + } + } + + sb.SetCreated() + if err := label.ReserveLabel(processLabel); err != nil { return err } @@ -468,9 +503,6 @@ func (c *ContainerServer) ContainerStateFromDisk(ctr *oci.Container) error { // ContainerStateToDisk writes the container's state information to a JSON file // on disk func (c *ContainerServer) ContainerStateToDisk(ctr *oci.Container) error { - if ctr == nil { - return nil - } if err := c.Runtime().UpdateContainerStatus(ctr); err != nil { logrus.Warnf("error updating the container status %q: %v", ctr.ID(), err) } diff --git a/internal/lib/sandbox/namespaces.go b/internal/lib/sandbox/namespaces.go index 6f2b5790f14..c18c9538cd0 100644 --- a/internal/lib/sandbox/namespaces.go +++ b/internal/lib/sandbox/namespaces.go @@ -69,19 +69,21 @@ func (m *ManagedNamespace) Path() string { // CreateManagedNamespaces calls pinnsPath on all the managed namespaces for the sandbox. // It returns a slice of ManagedNamespaces it created. -func (s *Sandbox) CreateManagedNamespaces(managedNamespaces []NSType, idMappings *idtools.IDMappings, cfg *config.Config) ([]*ManagedNamespace, error) { - return s.CreateNamespacesWithFunc(managedNamespaces, idMappings, cfg, pinNamespaces) +func (s *Sandbox) CreateManagedNamespaces(managedNamespaces []NSType, idMappings *idtools.IDMappings, sysctls map[string]string, cfg *config.Config) ([]*ManagedNamespace, error) { + return s.CreateNamespacesWithFunc(managedNamespaces, idMappings, sysctls, cfg, pinNamespaces) } +type namespacePinner func([]NSType, *config.Config, *idtools.IDMappings, map[string]string) ([]NamespaceIface, error) + // CreateManagedNamespacesWithFunc is mainly added for testing purposes. There's no point in actually calling the pinns binary // in unit tests, so this function allows the actual pin func to be abstracted out. Every other caller should use CreateManagedNamespaces -func (s *Sandbox) CreateNamespacesWithFunc(managedNamespaces []NSType, idMappings *idtools.IDMappings, cfg *config.Config, pinFunc func([]NSType, *config.Config, *idtools.IDMappings) ([]NamespaceIface, error)) (mns []*ManagedNamespace, retErr error) { +func (s *Sandbox) CreateNamespacesWithFunc(managedNamespaces []NSType, idMappings *idtools.IDMappings, sysctls map[string]string, cfg *config.Config, pinFunc namespacePinner) (mns []*ManagedNamespace, retErr error) { typesAndPaths := make([]*ManagedNamespace, 0, 4) if len(managedNamespaces) == 0 { return typesAndPaths, nil } - namespaces, err := pinFunc(managedNamespaces, cfg, idMappings) + namespaces, err := pinFunc(managedNamespaces, cfg, idMappings, sysctls) if err != nil { return nil, err } @@ -305,7 +307,7 @@ func (s *Sandbox) nsPath(ns NamespaceIface, nsType NSType) string { // if the infra container is nil, pid is returned negative func infraPid(infra *oci.Container) int { pid := -1 - if infra != nil { + if infra != nil && !infra.Spoofed() { var err error pid, err = infra.Pid() // There are some cases where ErrNotInitialized is expected. diff --git a/internal/lib/sandbox/namespaces_linux.go b/internal/lib/sandbox/namespaces_linux.go index 39367454889..6c1f53a27e6 100644 --- a/internal/lib/sandbox/namespaces_linux.go +++ b/internal/lib/sandbox/namespaces_linux.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "sync" nspkg "github.com/containernetworking/plugins/pkg/ns" @@ -64,7 +65,7 @@ func getMappingsForPinns(mappings []idtools.IDMap) string { // Creates a new persistent namespace and returns an object // representing that namespace, without switching to it -func pinNamespaces(nsTypes []NSType, cfg *config.Config, idMappings *idtools.IDMappings) ([]NamespaceIface, error) { +func pinNamespaces(nsTypes []NSType, cfg *config.Config, idMappings *idtools.IDMappings, sysctls map[string]string) ([]NamespaceIface, error) { typeToArg := map[NSType]string{ IPCNS: "-i", UTSNS: "-u", @@ -77,6 +78,11 @@ func pinNamespaces(nsTypes []NSType, cfg *config.Config, idMappings *idtools.IDM "-d", cfg.NamespacesDir, "-f", pinnedNamespace, } + + if len(sysctls) != 0 { + pinnsArgs = append(pinnsArgs, "-s", getSysctlForPinns(sysctls)) + } + type namespaceInfo struct { path string nsType NSType @@ -152,6 +158,16 @@ func pinNamespaces(nsTypes []NSType, cfg *config.Config, idMappings *idtools.IDM return returnedNamespaces, nil } +func getSysctlForPinns(sysctls map[string]string) string { + // this assumes there's no sysctl with a `+` in it + const pinnsSysctlDelim = "+" + g := new(bytes.Buffer) + for key, value := range sysctls { + fmt.Fprintf(g, "'%s=%s'%s", key, value, pinnsSysctlDelim) + } + return strings.TrimSuffix(g.String(), pinnsSysctlDelim) +} + // getNamespace takes a path, checks if it is a namespace, and if so // returns a Namespace func getNamespace(nsPath string) (*Namespace, error) { diff --git a/internal/lib/sandbox/namespaces_test.go b/internal/lib/sandbox/namespaces_test.go index 5a7d6a5aa89..60c78dc0e2e 100644 --- a/internal/lib/sandbox/namespaces_test.go +++ b/internal/lib/sandbox/namespaces_test.go @@ -41,7 +41,7 @@ type pinNamespacesFunctor struct { // pinNamespaces is a spoof of namespaces_linux.go:pinNamespaces. // it calls ifaceModifyFunc() to customize the behavior of this functor -func (p *pinNamespacesFunctor) pinNamespaces(nsTypes []sandbox.NSType, cfg *config.Config, mappings *idtools.IDMappings) ([]sandbox.NamespaceIface, error) { +func (p *pinNamespacesFunctor) pinNamespaces(nsTypes []sandbox.NSType, cfg *config.Config, mappings *idtools.IDMappings, sysctls map[string]string) ([]sandbox.NamespaceIface, error) { ifaces := make([]sandbox.NamespaceIface, 0) for _, nsType := range nsTypes { if mappings == nil && nsType == sandbox.USERNS { @@ -98,7 +98,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { managedNamespaces := make([]sandbox.NSType, 0) // When - ns, err := testSandbox.CreateManagedNamespaces(managedNamespaces, nil, nil) + ns, err := testSandbox.CreateManagedNamespaces(managedNamespaces, idMappings, nil, nil) // Then Expect(err).To(BeNil()) @@ -116,7 +116,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { managedNamespaces := []sandbox.NSType{"invalid"} // When - _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, nil, nil, withRemoval.pinNamespaces) + _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, nil, withRemoval.pinNamespaces) // Then Expect(err).To(Not(BeNil())) @@ -129,7 +129,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { } successful := newGenericFunctor() // When - createdNamespaces, err := testSandbox.CreateNamespacesWithFunc(allManagedNamespaces, idMappings, nil, successful.pinNamespaces) + createdNamespaces, err := testSandbox.CreateNamespacesWithFunc(allManagedNamespaces, idMappings, nil, nil, successful.pinNamespaces) // Then Expect(err).To(BeNil()) @@ -161,7 +161,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { }, } - createdNamespaces, err := testSandbox.CreateNamespacesWithFunc(allManagedNamespaces, idMappings, nil, withTmpDir.pinNamespaces) + createdNamespaces, err := testSandbox.CreateNamespacesWithFunc(allManagedNamespaces, idMappings, nil, nil, withTmpDir.pinNamespaces) Expect(err).To(BeNil()) for _, ns := range createdNamespaces { @@ -245,7 +245,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { successful := newGenericFunctor() // When - _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, successful.pinNamespaces) + _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, nil, successful.pinNamespaces) Expect(err).To(BeNil()) err = testSandbox.NetNsJoin("/proc/self/ns/net") @@ -258,7 +258,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { successful := newGenericFunctor() // When - _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, nil, nil, successful.pinNamespaces) + _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, nil, successful.pinNamespaces) Expect(err).To(BeNil()) err = testSandbox.IpcNsJoin("/proc/self/ns/ipc") @@ -271,7 +271,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { successful := newGenericFunctor() // When - _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, nil, nil, successful.pinNamespaces) + _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, nil, successful.pinNamespaces) Expect(err).To(BeNil()) err = testSandbox.UtsNsJoin("/proc/self/ns/uts") @@ -283,7 +283,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { managedNamespaces := []sandbox.NSType{"user"} successful := newGenericFunctor() // When - _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, successful.pinNamespaces) + _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, nil, successful.pinNamespaces) Expect(err).To(BeNil()) err = testSandbox.UserNsJoin("/proc/self/ns/user") @@ -371,7 +371,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { }, } - _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, getPath.pinNamespaces) + _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, nil, getPath.pinNamespaces) Expect(err).To(BeNil()) // When @@ -390,7 +390,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { }, } - _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, getPath.pinNamespaces) + _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, nil, getPath.pinNamespaces) Expect(err).To(BeNil()) // When @@ -409,7 +409,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { }, } - _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, getPath.pinNamespaces) + _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, nil, getPath.pinNamespaces) Expect(err).To(BeNil()) // When @@ -428,7 +428,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { }, } - _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, getPath.pinNamespaces) + _, err := testSandbox.CreateNamespacesWithFunc(managedNamespaces, idMappings, nil, nil, getPath.pinNamespaces) Expect(err).To(BeNil()) // When @@ -480,7 +480,7 @@ var _ = t.Describe("SandboxManagedNamespaces", func() { }, } // When - _, err := testSandbox.CreateNamespacesWithFunc(allManagedNamespaces, idMappings, nil, getPath.pinNamespaces) + _, err := testSandbox.CreateNamespacesWithFunc(allManagedNamespaces, idMappings, nil, nil, getPath.pinNamespaces) Expect(err).To(BeNil()) // When nsPaths := testSandbox.NamespacePaths() diff --git a/internal/lib/sandbox/sandbox.go b/internal/lib/sandbox/sandbox.go index 86ec286b60a..c2cef1f582e 100644 --- a/internal/lib/sandbox/sandbox.go +++ b/internal/lib/sandbox/sandbox.go @@ -382,15 +382,20 @@ func (s *Sandbox) Created() bool { } // Ready returns whether the sandbox should be marked as ready to the kubelet -// if there is no infra container, it is always considered ready -// takeLock should be set if we need to take the lock to get the infra container's state +// if there is no infra container, it is always considered ready. +// `takeLock` should be set if we need to take the lock to get the infra container's state. +// If there is no infra container, it is never considered ready. +// If the infra container is spoofed, the pod is considered ready when it has been created, but not stopped. func (s *Sandbox) Ready(takeLock bool) bool { podInfraContainer := s.InfraContainer() if podInfraContainer == nil { - // Assume the sandbox is ready, unless it has an infra container that - // isn't running - return true + return false + } + if podInfraContainer.Spoofed() { + return s.created && !s.stopped } + // Assume the sandbox is ready, unless it has an infra container that + // isn't running var cState *oci.ContainerState if takeLock { cState = podInfraContainer.State() @@ -417,3 +422,10 @@ func (s *Sandbox) UnmountShm() error { return nil } + +// NeedsInfra is a function that returns whether the sandbox will need an infra container. +// If the server manages the namespace lifecycles, and the Pid option on the sandbox +// is node or container level, the infra container is not needed +func (s *Sandbox) NeedsInfra(serverDropsInfra bool) bool { + return !serverDropsInfra || s.nsOpts.GetPid() == pb.NamespaceMode_POD +} diff --git a/internal/lib/sandbox/sandbox_test.go b/internal/lib/sandbox/sandbox_test.go index 37735c6593a..da213d324ff 100644 --- a/internal/lib/sandbox/sandbox_test.go +++ b/internal/lib/sandbox/sandbox_test.go @@ -257,4 +257,61 @@ var _ = t.Describe("Sandbox", func() { Expect(err).NotTo(BeNil()) }) }) + t.Describe("NeedsInfra", func() { + It("should not need when managing NS and NS mode NODE", func() { + // Given + manageNS := true + newNamespaceOption := &pb.NamespaceOption{ + Pid: pb.NamespaceMode_NODE, + } + + // When + testSandbox.SetNamespaceOptions(newNamespaceOption) + + // Then + Expect(testSandbox.NeedsInfra(manageNS)).To(Equal(false)) + }) + + It("should not need when managing NS and NS mode CONTAINER", func() { + // Given + manageNS := true + newNamespaceOption := &pb.NamespaceOption{ + Pid: pb.NamespaceMode_CONTAINER, + } + + // When + testSandbox.SetNamespaceOptions(newNamespaceOption) + + // Then + Expect(testSandbox.NeedsInfra(manageNS)).To(Equal(false)) + }) + + It("should need when namespace mode POD", func() { + // Given + manageNS := false + newNamespaceOption := &pb.NamespaceOption{ + Pid: pb.NamespaceMode_POD, + } + + // When + testSandbox.SetNamespaceOptions(newNamespaceOption) + + // Then + Expect(testSandbox.NeedsInfra(manageNS)).To(Equal(true)) + }) + + It("should need when not managing NS", func() { + // Given + manageNS := true + newNamespaceOption := &pb.NamespaceOption{ + Pid: pb.NamespaceMode_CONTAINER, + } + + // When + testSandbox.SetNamespaceOptions(newNamespaceOption) + + // Then + Expect(testSandbox.NeedsInfra(manageNS)).To(Equal(false)) + }) + }) }) diff --git a/internal/oci/container.go b/internal/oci/container.go index 8fcdaae16dd..2e31d2a37a5 100644 --- a/internal/oci/container.go +++ b/internal/oci/container.go @@ -14,6 +14,7 @@ import ( "github.com/containers/libpod/v2/pkg/cgroups" "github.com/containers/storage/pkg/idtools" + ann "github.com/cri-o/cri-o/pkg/annotations" json "github.com/json-iterator/go" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" @@ -71,6 +72,7 @@ type Container struct { stdin bool stdinOnce bool created bool + spoofed bool } // ContainerVolume is a bind mount for the container. @@ -124,6 +126,24 @@ func NewContainer(id, name, bundlePath, logPath string, labels, crioAnnotations, return c, nil } +func NewSpoofedContainer(id, name string, labels map[string]string, created time.Time, dir string) *Container { + state := &ContainerState{} + state.Created = created + state.Started = created + c := &Container{ + id: id, + name: name, + labels: labels, + spoofed: true, + state: state, + dir: dir, + } + c.annotations = map[string]string{ + ann.SpoofedContainer: "true", + } + return c +} + // SetSpec loads the OCI spec in the container struct func (c *Container) SetSpec(s *specs.Spec) { c.spec = s @@ -234,14 +254,14 @@ func (c *Container) Name() string { // ID returns the id of the container. func (c *Container) ID() string { - if c == nil { - return "" - } return c.id } // CleanupConmonCgroup cleans up conmon's group when using cgroupfs. func (c *Container) CleanupConmonCgroup() { + if c.spoofed { + return + } path := c.ConmonCgroupfsPath() if path == "" { return @@ -505,3 +525,11 @@ func (c *Container) ShouldBeStopped() error { } return nil } + +// Spoofed returns whether this container is spoofed. +// A container should be spoofed when it doesn't have to exist in the container runtime, +// but does need to exist in the storage. The main use of this is when an infra container +// is not needed, but sandbox metadata should be stored with a spoofed infra container. +func (c *Container) Spoofed() bool { + return c.spoofed +} diff --git a/internal/oci/container_test.go b/internal/oci/container_test.go index 65239414bc6..51289f6811f 100644 --- a/internal/oci/container_test.go +++ b/internal/oci/container_test.go @@ -54,6 +54,7 @@ var _ = t.Describe("Container", func() { Expect(sut.GetStopSignal()).To(Equal("15")) Expect(sut.CreatedAt().UnixNano()). To(BeNumerically("<", time.Now().UnixNano())) + Expect(sut.Spoofed()).To(Equal(false)) }) It("should succeed to set the spec", func() { @@ -492,3 +493,20 @@ var _ = t.Describe("Container", func() { }) }) }) + +var _ = t.Describe("SpoofedContainer", func() { + It("should succeed to get the container fields", func() { + sut := oci.NewSpoofedContainer("id", "name", map[string]string{"key": "label"}, time.Now(), "dir") + // Given + // When + // Then + Expect(sut.ID()).To(Equal("id")) + Expect(sut.Name()).To(Equal("name")) + labels := sut.Labels() + Expect(labels["key"]).To(Equal("label")) + Expect(sut.Spoofed()).To(Equal(true)) + Expect(sut.CreatedAt().UnixNano()). + To(BeNumerically("<", time.Now().UnixNano())) + Expect(sut.Dir()).To(Equal("dir")) + }) +}) diff --git a/internal/oci/oci.go b/internal/oci/oci.go index b67ab1b4316..6e1828de8bc 100644 --- a/internal/oci/oci.go +++ b/internal/oci/oci.go @@ -189,11 +189,11 @@ func (r *Runtime) PrivilegedWithoutHostDevices(handler string) (bool, error) { return rh.PrivilegedWithoutHostDevices, nil } -// ContainerRuntimeType returns the type of runtime configured. +// RuntimeType returns the type of runtimeHandler // This is needed when callers need to do specific work for oci vs vm // containers, like monitor an oci container's conmon. -func (r *Runtime) ContainerRuntimeType(c *Container) (string, error) { - rh, err := r.getRuntimeHandler(c.runtimeHandler) +func (r *Runtime) RuntimeType(runtimeHandler string) (string, error) { + rh, err := r.getRuntimeHandler(runtimeHandler) if err != nil { return "", err } diff --git a/internal/oci/oci_linux.go b/internal/oci/oci_linux.go index a5fcc1d1005..199372349e9 100644 --- a/internal/oci/oci_linux.go +++ b/internal/oci/oci_linux.go @@ -20,6 +20,9 @@ import ( ) func (r *runtimeOCI) createContainerPlatform(c *Container, cgroupParent string, pid int) error { + if c.Spoofed() { + return nil + } // Move conmon to specified cgroup conmonCgroupfsPath, err := r.config.CgroupManager().MoveConmonToCgroup(c.id, cgroupParent, r.config.ConmonCgroup, pid) if err != nil { @@ -50,6 +53,10 @@ func (r *runtimeOCI) containerStats(ctr *Container, cgroup string) (*ContainerSt stats.Container = ctr.ID() stats.SystemNano = time.Now().UnixNano() + if ctr.Spoofed() { + return stats, nil + } + // technically, the CRI does not mandate a CgroupParent is given to a pod // this situation should never happen in production, but some test suites // (such as critest) assume we can call stats on a cgroupless container diff --git a/internal/oci/runtime_oci.go b/internal/oci/runtime_oci.go index d462e56cc4c..35e88752c73 100644 --- a/internal/oci/runtime_oci.go +++ b/internal/oci/runtime_oci.go @@ -77,6 +77,10 @@ type exitCodeInfo struct { // CreateContainer creates a container. func (r *runtimeOCI) CreateContainer(c *Container, cgroupParent string) (retErr error) { + if c.Spoofed() { + return nil + } + var stderrBuf bytes.Buffer parentPipe, childPipe, err := newPipe() childStartPipe, parentStartPipe, err := newPipe() @@ -238,6 +242,10 @@ func (r *runtimeOCI) StartContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() + if c.Spoofed() { + return nil + } + if _, err := utils.ExecCmd( r.path, rootFlag, r.root, "start", c.id, ); err != nil { @@ -312,6 +320,10 @@ func parseLog(l []byte) (stdout, stderr []byte) { // ExecContainer prepares a streaming endpoint to execute a command in the container. func (r *runtimeOCI) ExecContainer(c *Container, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error { + if c.Spoofed() { + return nil + } + processFile, err := prepareProcessExec(c, cmd, tty) if err != nil { return err @@ -378,6 +390,10 @@ func (r *runtimeOCI) ExecContainer(c *Container, cmd []string, stdin io.Reader, // ExecSyncContainer execs a command in a container and returns it's stdout, stderr and return code. func (r *runtimeOCI) ExecSyncContainer(c *Container, command []string, timeout int64) (*ExecSyncResponse, error) { + if c.Spoofed() { + return nil, nil + } + pidFile, parentPipe, childPipe, err := prepareExec() if err != nil { return nil, &ExecSyncError{ @@ -547,6 +563,10 @@ func (r *runtimeOCI) ExecSyncContainer(c *Container, command []string, timeout i // UpdateContainer updates container resources func (r *runtimeOCI) UpdateContainer(c *Container, res *rspec.LinuxResources) error { + if c.Spoofed() { + return nil + } + cmd := exec.Command(r.path, rootFlag, r.root, "update", "--resources", "-", c.id) // nolint: gosec var stdout bytes.Buffer var stderr bytes.Buffer @@ -630,6 +650,12 @@ func (r *runtimeOCI) StopContainer(ctx context.Context, c *Container, timeout in return err } + if c.Spoofed() { + c.state.Status = ContainerStateStopped + c.state.Finished = time.Now() + return nil + } + // The initial container process either doesn't exist, or isn't ours. if err := c.verifyPid(); err != nil { c.state.Finished = time.Now() @@ -671,6 +697,10 @@ func (r *runtimeOCI) DeleteContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() + if c.Spoofed() { + return nil + } + _, err := utils.ExecCmd(r.path, rootFlag, r.root, "delete", "--force", c.id) return err } @@ -702,6 +732,10 @@ func (r *runtimeOCI) UpdateContainerStatus(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() + if c.Spoofed() { + return nil + } + if c.state.ExitCode != nil && !c.state.Finished.IsZero() { logrus.Debugf("Skipping status update for: %+v", c.state) return nil @@ -811,6 +845,10 @@ func (r *runtimeOCI) PauseContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() + if c.Spoofed() { + return nil + } + _, err := utils.ExecCmd(r.path, rootFlag, r.root, "pause", c.id) return err } @@ -820,6 +858,10 @@ func (r *runtimeOCI) UnpauseContainer(c *Container) error { c.opLock.Lock() defer c.opLock.Unlock() + if c.Spoofed() { + return nil + } + _, err := utils.ExecCmd(r.path, rootFlag, r.root, "resume", c.id) return err } @@ -832,7 +874,6 @@ func (r *runtimeOCI) WaitContainerStateStopped(ctx context.Context, c *Container func (r *runtimeOCI) ContainerStats(c *Container, cgroup string) (*ContainerStats, error) { c.opLock.Lock() defer c.opLock.Unlock() - return r.containerStats(c, cgroup) } @@ -841,6 +882,10 @@ func (r *runtimeOCI) SignalContainer(c *Container, sig syscall.Signal) error { c.opLock.Lock() defer c.opLock.Unlock() + if c.Spoofed() { + return nil + } + if unix.SignalName(sig) == "" { return errors.Errorf("unable to find signal %s", sig.String()) } @@ -853,6 +898,10 @@ func (r *runtimeOCI) SignalContainer(c *Container, sig syscall.Signal) error { // AttachContainer attaches IO to a running container. func (r *runtimeOCI) AttachContainer(c *Container, inputStream io.Reader, outputStream, errorStream io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error { + if c.Spoofed() { + return nil + } + controlPath := filepath.Join(c.BundlePath(), "ctl") controlFile, err := os.OpenFile(controlPath, os.O_WRONLY, 0) if err != nil { @@ -1012,6 +1061,10 @@ func (r *runtimeOCI) PortForwardContainer(ctx context.Context, c *Container, net // ReopenContainerLog reopens the log file of a container. func (r *runtimeOCI) ReopenContainerLog(c *Container) error { + if c.Spoofed() { + return nil + } + controlPath := filepath.Join(c.BundlePath(), "ctl") controlFile, err := os.OpenFile(controlPath, os.O_WRONLY, 0) if err != nil { diff --git a/pinns/Makefile b/pinns/Makefile index 166d2a9259b..762a0791826 100644 --- a/pinns/Makefile +++ b/pinns/Makefile @@ -1,4 +1,4 @@ -src = $(wildcard *.c) +src = $(wildcard src/*.c) obj = $(src:.c=.o) STRIP ?= strip @@ -12,6 +12,9 @@ all: ../bin/pinns $(CC) -o $@ $^ $(CFLAGS) $(LIBS) $(STRIP) -s $@ +%.o: %.c $(HEADERS) + $(CC) $(CFLAGS) -O3 -o $@ -c $< + ../bin: mkdir -p $@ diff --git a/pinns/pinns.c b/pinns/src/pinns.c similarity index 95% rename from pinns/pinns.c rename to pinns/src/pinns.c index e334b5b5f97..aa910cd6e27 100644 --- a/pinns/pinns.c +++ b/pinns/src/pinns.c @@ -16,6 +16,7 @@ #include #include "utils.h" +#include "sysctl.h" static int bind_ns(const char *pin_path, const char *filename, const char *ns_name, pid_t pid); static int directory_exists_or_create(const char* path); @@ -42,6 +43,7 @@ int main(int argc, char **argv) { bool bind_ipc = false; bool bind_user = false; bool bind_cgroup = false; + char *sysctls = NULL; static const struct option long_options[] = { {"help", no_argument, NULL, 'h'}, @@ -54,9 +56,10 @@ int main(int argc, char **argv) { {"filename", required_argument, NULL, 'f'}, {"uid-mapping", optional_argument, NULL, UID_MAPPING}, {"gid-mapping", optional_argument, NULL, GID_MAPPING}, + {"sysctl", optional_argument, NULL, 's'}, }; - while ((c = getopt_long(argc, argv, "pchuUind:f:", long_options, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "pchuUind:f:s:", long_options, NULL)) != -1) { switch (c) { case 'u': unshare_flags |= CLONE_NEWUTS; @@ -83,12 +86,15 @@ int main(int argc, char **argv) { unshare_flags |= CLONE_NEWCGROUP; bind_cgroup = true; num_unshares++; + break; #endif pexit("unsharing cgroups is not supported by this pinns version"); - break; case 'd': pin_path = optarg; break; + case 's': + sysctls = optarg; + break; case 'f': filename = optarg; break; @@ -174,6 +180,10 @@ int main(int argc, char **argv) { pexit("Cannot write gid mappings"); } + if (sysctls && configure_sysctls(sysctls) < 0) { + pexit("Failed to configure sysctls after unshare"); + } + if (bind_user) { if (bind_ns(pin_path, filename, "user", pid) < 0) { return EXIT_FAILURE; diff --git a/pinns/src/sysctl.c b/pinns/src/sysctl.c new file mode 100644 index 00000000000..058afb9a7e7 --- /dev/null +++ b/pinns/src/sysctl.c @@ -0,0 +1,120 @@ +#define _GNU_SOURCE + +#include +#include + +#include "sysctl.h" +#include "utils.h" + +static int separate_sysctl_key_value (char* sysctl_key_value, char** sysctl_key, char** sysctl_value); +static int write_sysctl_to_file (char * sysctl_key, char* sysctl_value); +const char *sysctl_delim = "+"; + +int configure_sysctls (char * const sysctls) +{ + char* sysctl = strtok(sysctls, sysctl_delim); + char* key = NULL; + char* value = NULL; + while (sysctl) + { + if (separate_sysctl_key_value (sysctl, &key, &value) < 0) + return -1; + + if (write_sysctl_to_file (key, value) < 0) + return -1; + sysctl = strtok (NULL, sysctl_delim); + } + + return 0; +} + +// key_value should be in the form `'k=v'` +static int separate_sysctl_key_value (char* key_value, char** key, char** value) +{ + // begin by stripping the `'`, we now have `k=v'` + bool quote_stripped = false; + if (*key_value == '\'') + { + key_value++; + quote_stripped = true; + } + + // now find the `=` and convert it to a delimiter + char * equals_token = strchr (key_value, '='); + if (!equals_token) + { + nwarnf ("sysctl must be in the form of 'key=value'; '=' missing from %s", key_value); + return -1; + } + + // if the location of the equals sign is the beginning of the string + // key is empty + if (equals_token == key_value) + { + nwarnf ("sysctl must be in the form of 'key=value'; key is empty"); + return -1; + } + + // we now have `k\0v'` + *equals_token = '\0'; + + // key is now `k` + *key = key_value; + + // equals_token is now `v'` + ++equals_token; + + // if we stripped the beginning single quote + // we should find and strip the ending, as well as anything after + if (quote_stripped) + { + char* ending_char = strchr (equals_token, '\''); + if (ending_char) + *ending_char = '\0'; + } + + // value is now `v` + *value = equals_token; + if (!strlen (*value)) + { + nwarnf ("sysctl must be in the form of 'key=value'; value is empty"); + return -1; + } + return 0; +} + +static int write_sysctl_to_file (char * sysctl_key, char* sysctl_value) +{ + if (!sysctl_key || !sysctl_value) + { + pwarn ("sysctl key or value not initialized"); + return -1; + } + + // replace periods with / to create the sysctl path + for (char* it = sysctl_key; *it; it++) + if (*it == '.') + *it = '/'; + + _cleanup_close_ int dirfd = open ("/proc/sys", O_DIRECTORY | O_PATH | O_CLOEXEC); + if (UNLIKELY (dirfd < 0)) + { + pwarn ("failed to open /proc/sys"); + return -1; + } + + _cleanup_close_ int fd = openat (dirfd, sysctl_key, O_WRONLY); + if (UNLIKELY (fd < 0)) + { + pwarnf ("failed to open /proc/sys/%s", sysctl_key); + return -1; + } + + int ret = TEMP_FAILURE_RETRY (write (fd, sysctl_value, strlen (sysctl_value))); + if (UNLIKELY (ret < 0)) + { + pwarnf ("failed to write to /proc/sys/%s", sysctl_key); + return -1; + } + return 0; +} diff --git a/pinns/src/sysctl.h b/pinns/src/sysctl.h new file mode 100644 index 00000000000..9f90d4e3aef --- /dev/null +++ b/pinns/src/sysctl.h @@ -0,0 +1,7 @@ +#pragma once +#if !defined(SYSCTL_H) +#define SYSCTL_H + +int configure_sysctls (char * const sysctls); + +#endif // SYSCTL_H diff --git a/pinns/utils.h b/pinns/src/utils.h similarity index 97% rename from pinns/utils.h rename to pinns/src/utils.h index 9b95d39dc2a..c213320bfd4 100644 --- a/pinns/utils.h +++ b/pinns/src/utils.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -93,4 +94,7 @@ static inline void fclosep(FILE **fp) { #define _cleanup_close_ _cleanup_(closep) #define _cleanup_fclose_ _cleanup_(fclosep) +# define LIKELY(x) __builtin_expect((x),1) +# define UNLIKELY(x) __builtin_expect((x),0) + #endif /* !defined(UTILS_H) */ diff --git a/pkg/annotations/annotations.go b/pkg/annotations/annotations.go new file mode 100644 index 00000000000..6aff72e72f8 --- /dev/null +++ b/pkg/annotations/annotations.go @@ -0,0 +1,9 @@ +package annotations + +const ( + // UsernsMode is the user namespace mode to use + UsernsModeAnnotation = "io.kubernetes.cri-o.userns-mode" + + // SpoofedContainer indicates a container was spoofed in the runtime + SpoofedContainer = "io.kubernetes.cri-o.Spoofed" +) diff --git a/pkg/config/config.go b/pkg/config/config.go index 305023e3140..ad77046afa2 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -284,6 +284,10 @@ type RuntimeConfig struct { // and manage their lifecycle ManageNSLifecycle bool `toml:"manage_ns_lifecycle"` + // DropInfraCtr determines whether the infra container is dropped when appropriate. + // Requires ManageNSLifecycle to be true. + DropInfraCtr bool `toml:"drop_infra_ctr"` + // ReadOnly run all pods/containers in read-only mode. // This mode will mount tmpfs on /run, /tmp and /var/tmp, if those are not mountpoints // Will also set the readonly flag in the OCI Runtime Spec. In this mode containers @@ -770,10 +774,13 @@ func (c *RuntimeConfig) Validate(systemContext *types.SystemContext, onExecution } if c.UIDMappings != "" && c.ManageNSLifecycle { - return fmt.Errorf("cannot use UIDMappings with ManageNSLifecycle") + return errors.New("cannot use UIDMappings with ManageNSLifecycle") } if c.GIDMappings != "" && c.ManageNSLifecycle { - return fmt.Errorf("cannot use GIDMappings with ManageNSLifecycle") + return errors.New("cannot use GIDMappings with ManageNSLifecycle") + } + if c.DropInfraCtr && !c.ManageNSLifecycle { + return errors.New("cannot drop infra without ManageNSLifecycle") } if c.LogSizeMax >= 0 && c.LogSizeMax < OCIBufSize { diff --git a/pkg/config/template.go b/pkg/config/template.go index 6e7c2bfcf0f..bbb88f92255 100644 --- a/pkg/config/template.go +++ b/pkg/config/template.go @@ -257,6 +257,12 @@ ctr_stop_timeout = {{ .CtrStopTimeout }} # and manage their lifecycle manage_ns_lifecycle = {{ .ManageNSLifecycle }} +# drop_infra_ctr determines whether CRI-O drops the infra container +# when a pod does not have a private PID namespace, and does not use +# a kernel separating runtime (like kata). +# It requires manage_ns_lifecycle to be true. +drop_infra_ctr = {{ .DropInfraCtr }} + # The directory where the state of the managed namespaces gets tracked. # Only used when manage_ns_lifecycle is true. namespaces_dir = "{{ .NamespacesDir }}" diff --git a/server/container_portforward.go b/server/container_portforward.go index 18acafe7027..2a45e93637d 100644 --- a/server/container_portforward.go +++ b/server/container_portforward.go @@ -7,7 +7,6 @@ import ( "github.com/containers/storage/pkg/pools" "github.com/cri-o/cri-o/internal/log" - "github.com/cri-o/cri-o/internal/oci" "github.com/pkg/errors" "golang.org/x/net/context" pb "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" @@ -49,23 +48,19 @@ func (s StreamService) PortForward(podSandboxID string, port int32, stream io.Re return fmt.Errorf("could not find sandbox %s", podSandboxID) } - c := sb.InfraContainer() - if err := s.runtimeServer.Runtime().UpdateContainerStatus(c); err != nil { - return err + if !sb.Ready(true) { + return fmt.Errorf("sandbox %s is not running", podSandboxID) } - cState := c.State() - if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) { - return fmt.Errorf("container is not created or running") - } - - emptyStreamOnError = false - - if sb.NetNsPath() == "" { + netNsPath := sb.NetNsPath() + if netNsPath == "" { return errors.Errorf( "network namespace path of sandbox %s is empty", sb.ID(), ) } - return s.runtimeServer.Runtime().PortForwardContainer(ctx, c, sb.NetNsPath(), port, stream) + // defer responsibility of emptying stream to PortForwardContainer + emptyStreamOnError = false + + return s.runtimeServer.Runtime().PortForwardContainer(ctx, sb.InfraContainer(), netNsPath, port, stream) } diff --git a/server/sandbox_remove.go b/server/sandbox_remove.go index 441cff24ab2..70f5b667d75 100644 --- a/server/sandbox_remove.go +++ b/server/sandbox_remove.go @@ -32,9 +32,7 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR podInfraContainer := sb.InfraContainer() containers := sb.Containers().List() - if podInfraContainer != nil { - containers = append(containers, podInfraContainer) - } + containers = append(containers, podInfraContainer) // Delete all the containers in the sandbox for _, c := range containers { @@ -77,10 +75,11 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR } } - if podInfraContainer != nil { - s.removeInfraContainer(podInfraContainer) - podInfraContainer.CleanupConmonCgroup() + s.removeInfraContainer(podInfraContainer) + podInfraContainer.CleanupConmonCgroup() + // StorageRuntimeServer won't know about this container, as it wasn't created in storage + if !podInfraContainer.Spoofed() { if err := s.StorageRuntimeServer().StopContainer(sb.ID()); err != nil && !errors.Is(err, storage.ErrContainerUnknown) { log.Warnf(ctx, "failed to stop sandbox container in pod sandbox %s: %v", sb.ID(), err) } @@ -99,11 +98,9 @@ func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxR } } - if podInfraContainer != nil { - s.ReleaseContainerName(podInfraContainer.Name()) - if err := s.CtrIDIndex().Delete(podInfraContainer.ID()); err != nil { - return nil, fmt.Errorf("failed to delete infra container %s in pod sandbox %s from index: %v", podInfraContainer.ID(), sb.ID(), err) - } + s.ReleaseContainerName(podInfraContainer.Name()) + if err := s.CtrIDIndex().Delete(podInfraContainer.ID()); err != nil { + return nil, fmt.Errorf("failed to delete infra container %s in pod sandbox %s from index: %v", podInfraContainer.ID(), sb.ID(), err) } s.ReleasePodName(sb.Name()) diff --git a/server/sandbox_run_linux.go b/server/sandbox_run_linux.go index 49e03269fda..a5ea55a5099 100644 --- a/server/sandbox_run_linux.go +++ b/server/sandbox_run_linux.go @@ -23,6 +23,7 @@ import ( libsandbox "github.com/cri-o/cri-o/internal/lib/sandbox" "github.com/cri-o/cri-o/internal/log" oci "github.com/cri-o/cri-o/internal/oci" + ann "github.com/cri-o/cri-o/pkg/annotations" libconfig "github.com/cri-o/cri-o/pkg/config" "github.com/cri-o/cri-o/pkg/sandbox" "github.com/cri-o/cri-o/utils" @@ -270,6 +271,7 @@ func (s *Server) getSandboxIDMappings(sb *libsandbox.Sandbox) (*idtools.IDMappin return mappings, nil } +// nolint:gocyclo func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, retErr error) { s.updateLock.RLock() defer s.updateLock.RUnlock() @@ -305,7 +307,7 @@ func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest kubeAnnotations := sbox.Config().GetAnnotations() - usernsMode := kubeAnnotations[lib.UsernsModeAnnotation] + usernsMode := kubeAnnotations[ann.UsernsModeAnnotation] idMappingsOptions, err := s.configureSandboxIDMappings(usernsMode, sbox.Config().GetLinux().GetSecurityContext()) if err != nil { @@ -660,12 +662,7 @@ func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest } // Add default sysctls given in crio.conf - s.configureGeneratorForSysctls(ctx, g, hostNetwork, hostIPC) - // extract linux sysctls from annotations and pass down to oci runtime - // Will override any duplicate default systcl from crio.conf - for key, value := range sbox.Config().GetLinux().GetSysctls() { - g.AddLinuxSysctl(key, value) - } + sysctls := s.configureGeneratorForSysctls(ctx, g, hostNetwork, hostIPC, req.GetConfig().GetLinux().GetSysctls()) // Set OOM score adjust of the infra container to be very low // so it doesn't get killed. @@ -674,7 +671,7 @@ func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.SetLinuxResourcesCPUShares(PodInfraCPUshares) // set up namespaces - cleanupFuncs, err := s.configureGeneratorForSandboxNamespaces(hostNetwork, hostIPC, hostPID, sandboxIDMappings, sb, g) + cleanupFuncs, err := s.configureGeneratorForSandboxNamespaces(hostNetwork, hostIPC, hostPID, sandboxIDMappings, sysctls, sb, g) // We want to cleanup after ourselves if we are managing any namespaces and fail in this function. defer func() { if retErr != nil { @@ -723,31 +720,6 @@ func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.AddAnnotation(annotations.HostnamePath, hostnamePath) sb.AddHostnamePath(hostnamePath) - container, err := oci.NewContainer(sbox.ID(), containerName, podContainer.RunDir, logPath, labels, g.Config.Annotations, kubeAnnotations, s.config.PauseImage, "", "", nil, sbox.ID(), false, false, false, runtimeHandler, podContainer.Dir, created, podContainer.Config.Config.StopSignal) - if err != nil { - return nil, err - } - - runtimeType, err := s.Runtime().ContainerRuntimeType(container) - if err != nil { - return nil, err - } - // If using kata runtime, the process label should be set to container_kvm_t - // Keep in mind that kata does *not* apply any process label to containers within the VM - // Note: the requirement here is that the name used for the runtime class has "kata" in it - // or the runtime_type is set to "vm" - if runtimeType == libconfig.RuntimeTypeVM || strings.Contains(strings.ToLower(runtimeHandler), "kata") { - processLabel, err = selinux.KVMLabel(processLabel) - if err != nil { - return nil, err - } - g.SetProcessSelinuxLabel(processLabel) - } - - container.SetMountPoint(mountPoint) - - container.SetIDMappings(sandboxIDMappings) - if sandboxIDMappings != nil { if securityContext.GetNamespaceOptions().GetIpc() == pb.NamespaceMode_NODE { g.RemoveMount("/dev/mqueue") @@ -780,6 +752,12 @@ func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest } g.AddMount(proc) } + rootPair := s.defaultIDMappings.RootPair() + for _, path := range pathsToChown { + if err := os.Chown(path, rootPair.UID, rootPair.GID); err != nil { + return nil, errors.Wrapf(err, "cannot chown %s to %d:%d", path, rootPair.UID, rootPair.GID) + } + } } g.SetRootPath(mountPoint) @@ -787,7 +765,55 @@ func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest makeOCIConfigurationRootless(&g) } - container.SetSpec(g.Config) + sb.SetNamespaceOptions(securityContext.GetNamespaceOptions()) + + spp := securityContext.GetSeccompProfilePath() + g.AddAnnotation(annotations.SeccompProfilePath, spp) + sb.SetSeccompProfilePath(spp) + if !privileged { + if err := s.setupSeccomp(ctx, &g, spp); err != nil { + return nil, err + } + } + + runtimeType, err := s.Runtime().RuntimeType(runtimeHandler) + if err != nil { + return nil, err + } + + // A container is kernel separated if we're using shimv2, or we're using a kata v1 binary + podIsKernelSeparated := runtimeType == libconfig.RuntimeTypeVM || + strings.Contains(strings.ToLower(runtimeHandler), "kata") || + (runtimeHandler == "" && strings.Contains(strings.ToLower(s.config.DefaultRuntime), "kata")) + + var container *oci.Container + // In the case of kernel separated containers, we need the infra container to create the VM for the pod + if sb.NeedsInfra(s.config.DropInfraCtr) || podIsKernelSeparated { + log.Debugf(ctx, "keeping infra container for pod %s", sbox.ID()) + container, err = oci.NewContainer(sbox.ID(), containerName, podContainer.RunDir, logPath, labels, g.Config.Annotations, kubeAnnotations, s.config.PauseImage, "", "", nil, sbox.ID(), false, false, false, runtimeHandler, podContainer.Dir, created, podContainer.Config.Config.StopSignal) + if err != nil { + return nil, err + } + // If using a kernel separated container runtime, the process label should be set to container_kvm_t + // Keep in mind that kata does *not* apply any process label to containers within the VM + if podIsKernelSeparated { + processLabel, err = selinux.KVMLabel(processLabel) + if err != nil { + return nil, err + } + g.SetProcessSelinuxLabel(processLabel) + } + + container.SetMountPoint(mountPoint) + + container.SetIDMappings(sandboxIDMappings) + + container.SetSpec(g.Config) + } else { + log.Debugf(ctx, "dropping infra container for pod %s", sbox.ID()) + container = oci.NewSpoofedContainer(sbox.ID(), containerName, labels, created, podContainer.RunDir) + g.AddAnnotation(ann.SpoofedContainer, "true") + } if err := sb.SetInfraContainer(container); err != nil { return nil, err @@ -826,19 +852,8 @@ func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest g.AddAnnotation(fmt.Sprintf("%s.%d", annotations.IP, idx), ip) } sb.AddIPs(ips) - sb.SetNamespaceOptions(securityContext.GetNamespaceOptions()) - - spp := securityContext.GetSeccompProfilePath() - g.AddAnnotation(annotations.SeccompProfilePath, spp) - sb.SetSeccompProfilePath(spp) - if !privileged { - if err := s.setupSeccomp(ctx, &g, spp); err != nil { - return nil, err - } - } - err = g.SaveToFile(filepath.Join(podContainer.Dir, "config.json"), saveOptions) - if err != nil { + if err = g.SaveToFile(filepath.Join(podContainer.Dir, "config.json"), saveOptions); err != nil { return nil, fmt.Errorf("failed to save template configuration for pod sandbox %s(%s): %v", sb.Name(), sbox.ID(), err) } if err = g.SaveToFile(filepath.Join(podContainer.RunDir, "config.json"), saveOptions); err != nil { @@ -964,26 +979,36 @@ func PauseCommand(cfg *libconfig.Config, image *v1.Image) ([]string, error) { return []string{cfg.PauseCommand}, nil } -func (s *Server) configureGeneratorForSysctls(ctx context.Context, g generate.Generator, hostNetwork, hostIPC bool) { - sysctls, err := s.config.RuntimeConfig.Sysctls() +func (s *Server) configureGeneratorForSysctls(ctx context.Context, g generate.Generator, hostNetwork, hostIPC bool, sysctls map[string]string) map[string]string { + sysctlsToReturn := make(map[string]string) + defaultSysctls, err := s.config.RuntimeConfig.Sysctls() if err != nil { log.Warnf(ctx, "sysctls invalid: %v", err) } - for _, sysctl := range sysctls { + for _, sysctl := range defaultSysctls { if err := sysctl.Validate(hostNetwork, hostIPC); err != nil { log.Warnf(ctx, "skipping invalid sysctl %s: %v", sysctl, err) continue } g.AddLinuxSysctl(sysctl.Key(), sysctl.Value()) + sysctlsToReturn[sysctl.Key()] = sysctl.Value() + } + + // extract linux sysctls from annotations and pass down to oci runtime + // Will override any duplicate default systcl from crio.conf + for key, value := range sysctls { + g.AddLinuxSysctl(key, value) + sysctlsToReturn[key] = value } + return sysctlsToReturn } // configureGeneratorForSandboxNamespaces set the linux namespaces for the generator, based on whether the pod is sharing namespaces with the host, // as well as whether CRI-O should be managing the namespace lifecycle. // it returns a slice of cleanup funcs, all of which are the respective NamespaceRemove() for the sandbox. // The caller should defer the cleanup funcs if there is an error, to make sure each namespace we are managing is properly cleaned up. -func (s *Server) configureGeneratorForSandboxNamespaces(hostNetwork, hostIPC, hostPID bool, idMappings *idtools.IDMappings, sb *libsandbox.Sandbox, g generate.Generator) (cleanupFuncs []func() error, retErr error) { +func (s *Server) configureGeneratorForSandboxNamespaces(hostNetwork, hostIPC, hostPID bool, idMappings *idtools.IDMappings, sysctls map[string]string, sb *libsandbox.Sandbox, g generate.Generator) (cleanupFuncs []func() error, retErr error) { managedNamespaces := make([]libsandbox.NSType, 0, 3) if hostNetwork { if err := g.RemoveLinuxNamespace(string(spec.NetworkNamespace)); err != nil { @@ -1021,7 +1046,7 @@ func (s *Server) configureGeneratorForSandboxNamespaces(hostNetwork, hostIPC, ho managedNamespaces = append(managedNamespaces, libsandbox.UTSNS) // now that we've configured the namespaces we're sharing, tell sandbox to configure them - managedNamespaces, err := sb.CreateManagedNamespaces(managedNamespaces, idMappings, &s.config) + managedNamespaces, err := sb.CreateManagedNamespaces(managedNamespaces, idMappings, sysctls, &s.config) if err != nil { return nil, err } diff --git a/server/sandbox_status.go b/server/sandbox_status.go index 21e481bd46f..b79eeabc34b 100644 --- a/server/sandbox_status.go +++ b/server/sandbox_status.go @@ -73,10 +73,13 @@ func toPodIPs(ips []string) (result []*pb.PodIP) { } func createSandboxInfo(c *oci.Container) (map[string]string, error) { + if c.Spoofed() { + return map[string]string{"info": "{}"}, nil + } info := struct { Image string `json:"image"` Pid int `json:"pid"` - RuntimeSpec spec.Spec `json:"runtimeSpec"` + RuntimeSpec spec.Spec `json:"runtimeSpec,omitempty"` }{ c.Image(), c.State().Pid, diff --git a/server/sandbox_stop_linux.go b/server/sandbox_stop_linux.go index a99626d2440..022429104a7 100644 --- a/server/sandbox_stop_linux.go +++ b/server/sandbox_stop_linux.go @@ -52,9 +52,7 @@ func (s *Server) stopPodSandbox(ctx context.Context, req *pb.StopPodSandboxReque podInfraContainer := sb.InfraContainer() containers := sb.Containers().List() - if podInfraContainer != nil { - containers = append(containers, podInfraContainer) - } + containers = append(containers, podInfraContainer) const maxWorkers = 128 var waitGroup errgroup.Group diff --git a/test/cgroups.bats b/test/cgroups.bats index 8b18498f037..16ee5fbd42a 100644 --- a/test/cgroups.bats +++ b/test/cgroups.bats @@ -28,7 +28,7 @@ function teardown() { } @test "conmon custom cgroup" { - CONTAINER_CGROUP_MANAGER="systemd" CONTAINER_CONMON_CGROUP="customcrioconmon.slice" start_crio + CONTAINER_CGROUP_MANAGER="systemd" CONTAINER_DROP_INFRA_CTR=false CONTAINER_MANAGE_NS_LIFECYCLE=false CONTAINER_CONMON_CGROUP="customcrioconmon.slice" start_crio python -c 'import json,sys;obj=json.load(sys.stdin);obj["linux"]["cgroup_parent"] = "Burstablecriotest123.slice"; json.dump(obj, sys.stdout)' \ < "$TESTDATA"/sandbox_config.json > "$TESTDIR"/sandbox_config_slice.json diff --git a/test/ctr_userns.bats b/test/ctr_userns.bats index 42cca58fdec..5fa5bdd3817 100644 --- a/test/ctr_userns.bats +++ b/test/ctr_userns.bats @@ -3,6 +3,8 @@ load helpers function setup() { + export CONTAINER_MANAGE_NS_LIFECYCLE=false + export CONTAINER_DROP_INFRA_CTR=false setup_test } @@ -16,7 +18,6 @@ function teardown() { fi export CONTAINER_UID_MAPPINGS="0:100000:100000" export CONTAINER_GID_MAPPINGS="0:200000:100000" - export CONTAINER_MANAGE_NS_LIFECYCLE=false # Workaround for https://github.com/opencontainers/runc/pull/1562 # Remove once the fix hits the CI diff --git a/test/drop_infra.bats b/test/drop_infra.bats new file mode 100644 index 00000000000..796621cb122 --- /dev/null +++ b/test/drop_infra.bats @@ -0,0 +1,34 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + setup_test + CONTAINER_MANAGE_NS_LIFECYCLE=true CONTAINER_DROP_INFRA_CTR=true start_crio +} + +function teardown() { + cleanup_test +} + +@test "test infra ctr dropped" { + python -c 'import json,sys;obj=json.load(sys.stdin);obj["linux"]["security_context"]["namespace_options"]["pid"] = 1; json.dump(obj, sys.stdout)' \ + < "$TESTDATA"/sandbox_config.json > "$TESTDIR"/sandbox_no_infra.json + pod_id=$(crictl runp "$TESTDIR"/sandbox_no_infra.json) + + run "$CONTAINER_RUNTIME" list + echo "$output" + [ "$status" -eq 0 ] + [[ ! "$output" = *"$pod_id"* ]] +} + +@test "test infra ctr not dropped" { + python -c 'import json,sys;obj=json.load(sys.stdin);obj["linux"]["security_context"]["namespace_options"]["pid"] = 0; json.dump(obj, sys.stdout)' \ + < "$TESTDATA"/sandbox_config.json > "$TESTDIR"/sandbox_no_infra.json + pod_id=$(crictl runp "$TESTDIR"/sandbox_no_infra.json) + + run "$CONTAINER_RUNTIME" list + echo "$output" + [ "$status" -eq 0 ] + [[ "$output" = *"$pod_id"* ]] +} diff --git a/test/helpers.bash b/test/helpers.bash index 34ffcc41296..5a1b6a43f0f 100644 --- a/test/helpers.bash +++ b/test/helpers.bash @@ -31,17 +31,21 @@ CONMON_BINARY=${CONMON_BINARY:-$(command -v conmon)} CONTAINER_CONMON_CGROUP=${CONTAINER_CONMON_CGROUP:-pod} # Path of the default seccomp profile. CONTAINER_SECCOMP_PROFILE=${CONTAINER_SECCOMP_PROFILE:-${CRIO_ROOT}/vendor/github.com/seccomp/containers-golang/seccomp.json} -# Runtime -CONTAINER_DEFAULT_RUNTIME=${CONTAINER_DEFAULT_RUNTIME:-runc} -RUNTIME_NAME=${RUNTIME_NAME:-runc} -CONTAINER_RUNTIME=${CONTAINER_RUNTIME:-runc} CONTAINER_UID_MAPPINGS=${CONTAINER_UID_MAPPINGS:-} CONTAINER_GID_MAPPINGS=${CONTAINER_GID_MAPPINGS:-} OVERRIDE_OPTIONS=${OVERRIDE_OPTIONS:-} +# Runtime +CONTAINER_RUNTIME=${CONTAINER_RUNTIME:-runc} +CONTAINER_DEFAULT_RUNTIME=${CONTAINER_DEFAULT_RUNTIME:-runc} +RUNTIME_NAME=${RUNTIME_NAME:-runc} RUNTIME_PATH=$(command -v "$CONTAINER_RUNTIME" || true) RUNTIME_BINARY=${RUNTIME_PATH:-$(command -v runc)} RUNTIME_ROOT=${RUNTIME_ROOT:-/run/runc} RUNTIME_TYPE=${RUNTIME_TYPE:-oci} +if [[ $CONTAINER_RUNTIME == "kata-runtime" ]]; then + export RUNTIME_NAME="$CONTAINER_RUNTIME" + export CONTAINER_DEFAULT_RUNTIME="$RUNTIME_NAME" +fi # Path of the apparmor_parser binary. APPARMOR_PARSER_BINARY=${APPARMOR_PARSER_BINARY:-/sbin/apparmor_parser} # Path of the apparmor profile for test. diff --git a/test/network.bats b/test/network.bats index 9f76ab2e1f0..9ac197b056a 100644 --- a/test/network.bats +++ b/test/network.bats @@ -127,9 +127,12 @@ function teardown() { } @test "Clean up network if pod sandbox fails" { + # TODO FIXME find a way for sandbox setup to fail if manage ns is true cp $(which conmon) "$TESTDIR"/conmon - CONMON_BINARY="$TESTDIR"/conmon - start_crio "" "prepare_plugin_test_args_network_conf" + CONTAINER_MANAGE_NS_LIFECYCLE=false \ + CONTAINER_DROP_INFRA_CTR=false \ + CONMON_BINARY="$TESTDIR"/conmon \ + start_crio "" "prepare_plugin_test_args_network_conf" # make conmon non-executable to cause the sandbox setup to fail after # networking has been configured diff --git a/test/pod.bats b/test/pod.bats index 54e078a3709..23eb7d8da6b 100644 --- a/test/pod.bats +++ b/test/pod.bats @@ -305,7 +305,8 @@ function teardown() { wrong_cgroup_parent_config=$(cat "$TESTDATA"/sandbox_config.json | python -c 'import json,sys;obj=json.load(sys.stdin);obj["linux"]["cgroup_parent"] = "podsandbox1.slice:container:infra"; json.dump(obj, sys.stdout)') echo "$wrong_cgroup_parent_config" > "$TESTDIR"/sandbox_wrong_cgroup_parent.json - start_crio + # kubelet is technically responsible for creating this cgroup. it is created in cri-o if there's an infra container + CONTAINER_DROP_INFRA_CTR=false start_crio run crictl runp "$TESTDIR"/sandbox_wrong_cgroup_parent.json echo "$output" [ "$status" -eq 1 ] @@ -321,7 +322,8 @@ function teardown() { cgroup_parent_config=$(cat "$TESTDATA"/sandbox_config.json | python -c 'import json,sys;obj=json.load(sys.stdin);obj["linux"]["cgroup_parent"] = "Burstable-pod_integration_tests-123.slice"; json.dump(obj, sys.stdout)') echo "$cgroup_parent_config" > "$TESTDIR"/sandbox_systemd_cgroup_parent.json - start_crio + # kubelet is technically responsible for creating this cgroup. it is created in cri-o if there's an infra container + CONTAINER_DROP_INFRA_CTR=false start_crio run crictl runp "$TESTDIR"/sandbox_systemd_cgroup_parent.json echo "$output" [ "$status" -eq 0 ] @@ -364,7 +366,7 @@ function teardown() { } @test "pod pause image matches configured image in crio.conf" { - start_crio + CONTAINER_DROP_INFRA_CTR=false start_crio run crictl runp "$TESTDATA"/sandbox_config.json echo "$output" diff --git a/test/restore.bats b/test/restore.bats index c6a95742d96..53b5980ae17 100644 --- a/test/restore.bats +++ b/test/restore.bats @@ -200,7 +200,8 @@ function teardown() { } @test "crio restore with bad state" { - start_crio + # this test makes no sense with no infra container + CONTAINER_DROP_INFRA_CTR=false start_crio run crictl runp "$TESTDATA"/sandbox_config.json echo "$output" [ "$status" -eq 0 ] @@ -305,7 +306,7 @@ function teardown() { } @test "crio restore first not managing then managing" { - CONTAINER_MANAGE_NS_LIFECYCLE=false start_crio + CONTAINER_MANAGE_NS_LIFECYCLE=false CONTAINER_DROP_INFRA_CTR=false start_crio run crictl runp "$TESTDATA"/sandbox_config.json echo "$output" [ "$status" -eq 0 ] @@ -378,7 +379,7 @@ function teardown() { } @test "crio restore first managing then not managing" { - CONTAINER_MANAGE_NS_LIFECYCLE=true start_crio + CONTAINER_MANAGE_NS_LIFECYCLE=true CONTAINER_DROP_INFRA_CTR=true start_crio run crictl runp "$TESTDATA"/sandbox_config.json echo "$output" [ "$status" -eq 0 ] @@ -412,7 +413,7 @@ function teardown() { stop_crio - CONTAINER_MANAGE_NS_LIFECYCLE=false start_crio + CONTAINER_MANAGE_NS_LIFECYCLE=false CONTAINER_DROP_INFRA_CTR=false start_crio run crictl pods --quiet echo "$output" [ "$status" -eq 0 ]