From c9b4eb84e7bc997406bfbc77f6d8bd35b7db6741 Mon Sep 17 00:00:00 2001 From: Yujie Xia Date: Sat, 29 Jan 2022 09:11:52 +0800 Subject: [PATCH 1/2] Fix vm containers couldn't restore after CRI-O restart Signed-off-by: Yujie Xia --- internal/oci/oci.go | 11 +++- internal/oci/runtime_vm.go | 116 ++++++++++++++++++++++++++++++++++++- 2 files changed, 122 insertions(+), 5 deletions(-) diff --git a/internal/oci/oci.go b/internal/oci/oci.go index eef99e8c9f5..34c1b1c5885 100644 --- a/internal/oci/oci.go +++ b/internal/oci/oci.go @@ -241,10 +241,17 @@ func (r *Runtime) RuntimeImpl(c *Container) (RuntimeImpl, error) { r.runtimeImplMapMutex.RLock() impl, ok := r.runtimeImplMap[c.ID()] r.runtimeImplMapMutex.RUnlock() - if !ok { - return r.newRuntimeImpl(c) + if ok { + return impl, nil } + impl, err := r.newRuntimeImpl(c) + if err != nil { + return nil, err + } + r.runtimeImplMapMutex.Lock() + r.runtimeImplMap[c.ID()] = impl + r.runtimeImplMapMutex.Unlock() return impl, nil } diff --git a/internal/oci/runtime_vm.go b/internal/oci/runtime_vm.go index 3a48f55b76d..71dbc108e70 100644 --- a/internal/oci/runtime_vm.go +++ b/internal/oci/runtime_vm.go @@ -3,6 +3,7 @@ package oci import ( "bytes" "io" + "io/ioutil" "os" "path/filepath" "strconv" @@ -13,6 +14,7 @@ import ( cgroups "github.com/containerd/cgroups/stats/v1" tasktypes "github.com/containerd/containerd/api/types/task" + ctrio "github.com/containerd/containerd/cio" "github.com/containerd/containerd/namespaces" client "github.com/containerd/containerd/runtime/v2/shim" "github.com/containerd/containerd/runtime/v2/task" @@ -89,6 +91,52 @@ func newRuntimeVM(path, root, configPath string) RuntimeImpl { } } +func (r *runtimeVM) createContainerIO(ctx context.Context, c *Container, cioOpts ...cio.ContainerIOOpts) (_ *cio.ContainerIO, retErr error) { + // Create IO fifos + containerIO, err := cio.NewContainerIO(c.ID(), cioOpts...) + if err != nil { + return nil, err + } + + defer func() { + if retErr != nil { + containerIO.Close() + } + }() + + f, err := os.OpenFile(c.LogPath(), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o600) + if err != nil { + return nil, err + } + + var stdoutCh, stderrCh <-chan struct{} + wc := cioutil.NewSerialWriteCloser(f) + stdout, stdoutCh := cio.NewCRILogger(c.LogPath(), wc, cio.Stdout, -1) + stderr, stderrCh := cio.NewCRILogger(c.LogPath(), wc, cio.Stderr, -1) + + go func() { + if stdoutCh != nil { + <-stdoutCh + } + if stderrCh != nil { + <-stderrCh + } + log.Debugf(ctx, "Finish redirecting log file %q, closing it", c.LogPath()) + f.Close() + }() + + containerIO.AddOutput(c.LogPath(), stdout, stderr) + containerIO.Pipe() + + r.Lock() + r.ctrs[c.ID()] = containerInfo{ + cio: containerIO, + } + r.Unlock() + + return containerIO, nil +} + // CreateContainer creates a container. func (r *runtimeVM) CreateContainer(ctx context.Context, c *Container, cgroupParent string) (retErr error) { log.Debugf(ctx, "RuntimeVM.CreateContainer() start") @@ -657,6 +705,50 @@ func (r *runtimeVM) UpdateContainerStatus(ctx context.Context, c *Container) err return r.updateContainerStatus(ctx, c) } +func (r *runtimeVM) restoreContainerIO(ctx context.Context, c *Container, state *task.StateResponse) error { + r.Lock() + _, ok := r.ctrs[c.ID()] + if ok { + r.Unlock() + return nil + } + r.Unlock() + + cioCfg := ctrio.Config{ + Terminal: state.Terminal, + Stdin: state.Stdin, + Stdout: state.Stdout, + Stderr: state.Stderr, + } + // The existing fifos is created by NewFIFOSetInDir. stdin, stdout, stderr should exist + // in a same temporary directory under r.fifoDir. crio is responsible for removing these + // files after container io is closed. + var iofiles []string + if cioCfg.Stdin != "" { + iofiles = append(iofiles, cioCfg.Stdin) + } + if cioCfg.Stdout != "" { + iofiles = append(iofiles, cioCfg.Stdout) + } + if cioCfg.Stderr != "" { + iofiles = append(iofiles, cioCfg.Stderr) + } + closer := func() error { + for _, f := range iofiles { + if err := os.Remove(f); err != nil { + return err + } + } + // Also try to remove the parent dir if it is empty. + for _, f := range iofiles { + _ = os.Remove(filepath.Dir(f)) + } + return nil + } + _, err := r.createContainerIO(ctx, c, cio.WithFIFOs(ctrio.NewFIFOSet(cioCfg, closer))) + return err +} + // updateContainerStatus is a UpdateContainerStatus helper, which actually does the container's // status refresh. // It does **not** Lock the container, thus it's the caller responsibility to do so, when needed. @@ -664,10 +756,24 @@ func (r *runtimeVM) updateContainerStatus(ctx context.Context, c *Container) err log.Debugf(ctx, "RuntimeVM.updateContainerStatus() start") defer log.Debugf(ctx, "RuntimeVM.updateContainerStatus() end") - // This can happen on restore, for example if we switch the runtime type - // for a container from "oci" to "vm" for the same runtime. + // This can happen on restore. We need to read shim address from the bundle path. + // And then connect to the existing gRPC server with this address. if r.task == nil { - return errors.New("runtime not correctly setup") + addressPath := filepath.Join(c.BundlePath(), "address") + data, err := ioutil.ReadFile(addressPath) + if err != nil { + log.Warnf(ctx, "Failed to read shim address: %v", err) + return errors.New("runtime not correctly setup") + } + address := strings.TrimSpace(string(data)) + conn, err := client.Connect(address, client.AnonDialer) + if err != nil { + return err + } + options := ttrpc.WithOnClose(func() { conn.Close() }) + cl := ttrpc.NewClient(conn, options) + r.client = cl + r.task = task.NewTaskClient(cl) } response, err := r.task.State(r.ctx, &task.StateRequest{ @@ -680,6 +786,10 @@ func (r *runtimeVM) updateContainerStatus(ctx context.Context, c *Container) err return errdefs.ErrNotFound } + if err = r.restoreContainerIO(ctx, c, response); err != nil { + return errors.Wrapf(err, "failed to restore container io") + } + status := c.state.Status switch response.Status { case tasktypes.StatusCreated: From e19f812f97db113f586ad8dff3e76dd9933bb699 Mon Sep 17 00:00:00 2001 From: Yujie Xia Date: Sat, 29 Jan 2022 09:19:46 +0800 Subject: [PATCH 2/2] Reuse createContainerIO in CreateContainer Signed-off-by: Yujie Xia --- internal/oci/runtime_vm.go | 222 +++++++++++++++---------------------- 1 file changed, 92 insertions(+), 130 deletions(-) diff --git a/internal/oci/runtime_vm.go b/internal/oci/runtime_vm.go index 71dbc108e70..001328c61c0 100644 --- a/internal/oci/runtime_vm.go +++ b/internal/oci/runtime_vm.go @@ -91,52 +91,6 @@ func newRuntimeVM(path, root, configPath string) RuntimeImpl { } } -func (r *runtimeVM) createContainerIO(ctx context.Context, c *Container, cioOpts ...cio.ContainerIOOpts) (_ *cio.ContainerIO, retErr error) { - // Create IO fifos - containerIO, err := cio.NewContainerIO(c.ID(), cioOpts...) - if err != nil { - return nil, err - } - - defer func() { - if retErr != nil { - containerIO.Close() - } - }() - - f, err := os.OpenFile(c.LogPath(), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o600) - if err != nil { - return nil, err - } - - var stdoutCh, stderrCh <-chan struct{} - wc := cioutil.NewSerialWriteCloser(f) - stdout, stdoutCh := cio.NewCRILogger(c.LogPath(), wc, cio.Stdout, -1) - stderr, stderrCh := cio.NewCRILogger(c.LogPath(), wc, cio.Stderr, -1) - - go func() { - if stdoutCh != nil { - <-stdoutCh - } - if stderrCh != nil { - <-stderrCh - } - log.Debugf(ctx, "Finish redirecting log file %q, closing it", c.LogPath()) - f.Close() - }() - - containerIO.AddOutput(c.LogPath(), stdout, stderr) - containerIO.Pipe() - - r.Lock() - r.ctrs[c.ID()] = containerInfo{ - cio: containerIO, - } - r.Unlock() - - return containerIO, nil -} - // CreateContainer creates a container. func (r *runtimeVM) CreateContainer(ctx context.Context, c *Container, cgroupParent string) (retErr error) { log.Debugf(ctx, "RuntimeVM.CreateContainer() start") @@ -169,52 +123,14 @@ func (r *runtimeVM) CreateContainer(ctx context.Context, c *Container, cgroupPar return err } - // Create IO fifos - containerIO, err := cio.NewContainerIO(c.ID(), - cio.WithNewFIFOs(r.fifoDir, c.terminal, c.stdin)) + containerIO, err := r.createContainerIO(ctx, c, cio.WithNewFIFOs(r.fifoDir, c.terminal, c.stdin)) if err != nil { return err } defer func() { if retErr != nil { - containerIO.Close() - } - }() - - f, err := os.OpenFile(c.LogPath(), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o600) - if err != nil { - return err - } - - var stdoutCh, stderrCh <-chan struct{} - wc := cioutil.NewSerialWriteCloser(f) - stdout, stdoutCh := cio.NewCRILogger(c.LogPath(), wc, cio.Stdout, -1) - stderr, stderrCh := cio.NewCRILogger(c.LogPath(), wc, cio.Stderr, -1) - - go func() { - if stdoutCh != nil { - <-stdoutCh - } - if stderrCh != nil { - <-stderrCh - } - log.Debugf(ctx, "Finish redirecting log file %q, closing it", c.LogPath()) - f.Close() - }() - - containerIO.AddOutput(c.LogPath(), stdout, stderr) - containerIO.Pipe() - - r.Lock() - r.ctrs[c.ID()] = containerInfo{ - cio: containerIO, - } - r.Unlock() - - defer func() { - if retErr != nil { - log.Warnf(ctx, "Cleaning up container %s: %v", c.ID(), err) + log.Warnf(ctx, "Cleaning up container %s: %v", c.ID(), retErr) if cleanupErr := r.deleteContainer(c, true); cleanupErr != nil { log.Infof(ctx, "DeleteContainer failed for container %s: %v", c.ID(), cleanupErr) } @@ -705,50 +621,6 @@ func (r *runtimeVM) UpdateContainerStatus(ctx context.Context, c *Container) err return r.updateContainerStatus(ctx, c) } -func (r *runtimeVM) restoreContainerIO(ctx context.Context, c *Container, state *task.StateResponse) error { - r.Lock() - _, ok := r.ctrs[c.ID()] - if ok { - r.Unlock() - return nil - } - r.Unlock() - - cioCfg := ctrio.Config{ - Terminal: state.Terminal, - Stdin: state.Stdin, - Stdout: state.Stdout, - Stderr: state.Stderr, - } - // The existing fifos is created by NewFIFOSetInDir. stdin, stdout, stderr should exist - // in a same temporary directory under r.fifoDir. crio is responsible for removing these - // files after container io is closed. - var iofiles []string - if cioCfg.Stdin != "" { - iofiles = append(iofiles, cioCfg.Stdin) - } - if cioCfg.Stdout != "" { - iofiles = append(iofiles, cioCfg.Stdout) - } - if cioCfg.Stderr != "" { - iofiles = append(iofiles, cioCfg.Stderr) - } - closer := func() error { - for _, f := range iofiles { - if err := os.Remove(f); err != nil { - return err - } - } - // Also try to remove the parent dir if it is empty. - for _, f := range iofiles { - _ = os.Remove(filepath.Dir(f)) - } - return nil - } - _, err := r.createContainerIO(ctx, c, cio.WithFIFOs(ctrio.NewFIFOSet(cioCfg, closer))) - return err -} - // updateContainerStatus is a UpdateContainerStatus helper, which actually does the container's // status refresh. // It does **not** Lock the container, thus it's the caller responsibility to do so, when needed. @@ -823,6 +695,96 @@ func (r *runtimeVM) updateContainerStatus(ctx context.Context, c *Container) err return nil } +func (r *runtimeVM) restoreContainerIO(ctx context.Context, c *Container, state *task.StateResponse) error { + r.Lock() + _, ok := r.ctrs[c.ID()] + if ok { + r.Unlock() + return nil + } + r.Unlock() + + cioCfg := ctrio.Config{ + Terminal: state.Terminal, + Stdin: state.Stdin, + Stdout: state.Stdout, + Stderr: state.Stderr, + } + // The existing fifos is created by NewFIFOSetInDir. stdin, stdout, stderr should exist + // in a same temporary directory under r.fifoDir. crio is responsible for removing these + // files after container io is closed. + var iofiles []string + if cioCfg.Stdin != "" { + iofiles = append(iofiles, cioCfg.Stdin) + } + if cioCfg.Stdout != "" { + iofiles = append(iofiles, cioCfg.Stdout) + } + if cioCfg.Stderr != "" { + iofiles = append(iofiles, cioCfg.Stderr) + } + closer := func() error { + for _, f := range iofiles { + if err := os.Remove(f); err != nil { + return err + } + } + // Also try to remove the parent dir if it is empty. + for _, f := range iofiles { + _ = os.Remove(filepath.Dir(f)) + } + return nil + } + _, err := r.createContainerIO(ctx, c, cio.WithFIFOs(ctrio.NewFIFOSet(cioCfg, closer))) + return err +} + +func (r *runtimeVM) createContainerIO(ctx context.Context, c *Container, cioOpts ...cio.ContainerIOOpts) (_ *cio.ContainerIO, retErr error) { + // Create IO fifos + containerIO, err := cio.NewContainerIO(c.ID(), cioOpts...) + if err != nil { + return nil, err + } + + defer func() { + if retErr != nil { + containerIO.Close() + } + }() + + f, err := os.OpenFile(c.LogPath(), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o600) + if err != nil { + return nil, err + } + + var stdoutCh, stderrCh <-chan struct{} + wc := cioutil.NewSerialWriteCloser(f) + stdout, stdoutCh := cio.NewCRILogger(c.LogPath(), wc, cio.Stdout, -1) + stderr, stderrCh := cio.NewCRILogger(c.LogPath(), wc, cio.Stderr, -1) + + go func() { + if stdoutCh != nil { + <-stdoutCh + } + if stderrCh != nil { + <-stderrCh + } + log.Debugf(ctx, "Finish redirecting log file %q, closing it", c.LogPath()) + f.Close() + }() + + containerIO.AddOutput(c.LogPath(), stdout, stderr) + containerIO.Pipe() + + r.Lock() + r.ctrs[c.ID()] = containerInfo{ + cio: containerIO, + } + r.Unlock() + + return containerIO, nil +} + // PauseContainer pauses a container. func (r *runtimeVM) PauseContainer(ctx context.Context, c *Container) error { log.Debugf(ctx, "RuntimeVM.PauseContainer() start")