Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions completions/bash/crio
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ h
--image-volumes
--infra-ctr-cpuset
--insecure-registry
--internal-repair
--internal-wipe
--irqbalance-config-file
--irqbalance-config-restore-file
Expand Down
1 change: 1 addition & 0 deletions completions/fish/crio.fish
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ complete -c crio -n '__fish_crio_no_subcommand' -f -l insecure-registry -r -d 'E
be enabled for testing purposes**. For increased security, users should add
their CA to their system\'s list of trusted CAs instead of using
\'--insecure-registry\'.'
complete -c crio -n '__fish_crio_no_subcommand' -f -l internal-repair -d 'If true, CRI-O will check if the container and image storage was corrupted after a sudden restart, and attempt to repair the storage if it was.'
complete -c crio -n '__fish_crio_no_subcommand' -f -l internal-wipe -d 'Whether CRI-O should wipe containers after a reboot and images after an upgrade when the server starts. If set to false, one must run `crio wipe` to wipe the containers and images in these situations. This option is deprecated, and will be removed in the future.'
complete -c crio -n '__fish_crio_no_subcommand' -f -l irqbalance-config-file -r -d 'The irqbalance service config file which is used by CRI-O.'
complete -c crio -n '__fish_crio_no_subcommand' -f -l irqbalance-config-restore-file -r -d 'Determines if CRI-O should attempt to restore the irqbalance config at startup with the mask in this file. Use the \'disable\' value to disable the restore flow entirely.'
Expand Down
1 change: 1 addition & 0 deletions completions/zsh/_crio
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ it later with **--config**. Global options will modify the output.'
'--image-volumes'
'--infra-ctr-cpuset'
'--insecure-registry'
'--internal-repair'
'--internal-wipe'
'--irqbalance-config-file'
'--irqbalance-config-restore-file'
Expand Down
3 changes: 3 additions & 0 deletions docs/crio.8.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ crio
[--image-volumes]=[value]
[--infra-ctr-cpuset]=[value]
[--insecure-registry]=[value]
[--internal-repair]
[--internal-wipe]
[--irqbalance-config-file]=[value]
[--irqbalance-config-restore-file]=[value]
Expand Down Expand Up @@ -289,6 +290,8 @@ crio [GLOBAL OPTIONS] command [COMMAND OPTIONS] [ARGUMENTS...]
their CA to their system's list of trusted CAs instead of using
'--insecure-registry'.

**--internal-repair**: If true, CRI-O will check if the container and image storage was corrupted after a sudden restart, and attempt to repair the storage if it was.

**--internal-wipe**: Whether CRI-O should wipe containers after a reboot and images after an upgrade when the server starts. If set to false, one must run `crio wipe` to wipe the containers and images in these situations. This option is deprecated, and will be removed in the future.

**--irqbalance-config-file**="": The irqbalance service config file which is used by CRI-O. (default: "/etc/sysconfig/irqbalance")
Expand Down
4 changes: 4 additions & 0 deletions docs/crio.conf.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ CRI-O reads its storage defaults from the containers-storage.conf(5) file locate
Whether CRI-O should wipe containers after a reboot and images after an upgrade when the server starts.
If set to false, one must run `crio wipe` to wipe the containers and images in these situations.

**internal_repair**=false
InternalRepair is whether CRI-O should check if the container and image storage was corrupted after a sudden restart.
If it was, CRI-O also attempts to repair the storage.

**clean_shutdown_file**="/var/lib/crio/clean.shutdown"
Location for CRI-O to lay down the clean shutdown file.
It is used to check whether crio had time to sync before shutting down.
Expand Down
9 changes: 9 additions & 0 deletions internal/criocli/criocli.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ func mergeConfig(config *libconfig.Config, ctx *cli.Context) error {
if ctx.IsSet("internal-wipe") {
config.InternalWipe = ctx.Bool("internal-wipe")
}
if ctx.IsSet("internal-repair") {
config.InternalRepair = ctx.Bool("internal-repair")
}
if ctx.IsSet("enable-metrics") {
config.EnableMetrics = ctx.Bool("enable-metrics")
}
Expand Down Expand Up @@ -1078,6 +1081,12 @@ func getCrioFlags(defConf *libconfig.Config) []cli.Flag {
Value: defConf.InternalWipe,
EnvVars: []string{"CONTAINER_INTERNAL_WIPE"},
},
&cli.BoolFlag{
Name: "internal-repair",
Usage: "If true, CRI-O will check if the container and image storage was corrupted after a sudden restart, and attempt to repair the storage if it was.",
EnvVars: []string{"CONTAINER_INTERNAL_REPAIR"},
Value: defConf.InternalRepair,
},
&cli.StringFlag{
Name: "infra-ctr-cpuset",
Usage: "CPU set to run infra containers, if not specified CRI-O will use all online CPUs to run infra containers.",
Expand Down
41 changes: 3 additions & 38 deletions internal/criocli/wipe.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@ package criocli

import (
"errors"
"fmt"
"os"

cstorage "github.com/containers/storage"
"github.com/cri-o/cri-o/internal/lib"
"github.com/cri-o/cri-o/internal/storage"
"github.com/cri-o/cri-o/internal/version"
crioconf "github.com/cri-o/cri-o/pkg/config"
json "github.com/json-iterator/go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
Expand Down Expand Up @@ -63,8 +62,8 @@ func crioWipe(c *cli.Context) error {
// Then, check whether crio has shutdown with time to sync.
// Note: this is only needed if the node rebooted.
// If there wasn't time to sync, we should clear the storage directory
if shouldWipeContainers && shutdownWasUnclean(config) {
return handleCleanShutdown(config, store)
if shouldWipeContainers && lib.ShutdownWasUnclean(config) {
return lib.HandleUncleanShutdown(config, store)
}

// If crio is configured to wipe internally (and `--force` wasn't set)
Expand Down Expand Up @@ -97,40 +96,6 @@ func crioWipe(c *cli.Context) error {
return nil
}

func shutdownWasUnclean(config *crioconf.Config) bool {
// CleanShutdownFile not configured, skip
if config.CleanShutdownFile == "" {
return false
}
// CleanShutdownFile isn't supported, skip
if _, err := os.Stat(config.CleanShutdownSupportedFileName()); err != nil {
return false
}
// CleanShutdownFile is present, indicating clean shutdown
if _, err := os.Stat(config.CleanShutdownFile); err == nil {
return false
}
return true
}

func handleCleanShutdown(config *crioconf.Config, store cstorage.Store) error {
logrus.Infof("File %s not found. Wiping storage directory %s because of suspected dirty shutdown", config.CleanShutdownFile, store.GraphRoot())
// If we do not do this, we may leak other resources that are not directly in the graphroot.
// Erroring here should not be fatal though, it's a best effort cleanup
if err := store.Wipe(); err != nil {
logrus.Infof("Failed to wipe storage cleanly: %v", err)
}
// unmount storage or else we will fail with EBUSY
if _, err := store.Shutdown(false); err != nil {
return fmt.Errorf("failed to shutdown storage before wiping: %w", err)
}
// totally remove storage, whatever is left (possibly orphaned layers)
if err := os.RemoveAll(store.GraphRoot()); err != nil {
return fmt.Errorf("failed to remove storage directory: %w", err)
}
return nil
}

type ContainerStore struct {
store cstorage.Store
}
Expand Down
55 changes: 55 additions & 0 deletions internal/lib/container_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"sync"
"time"
Expand Down Expand Up @@ -102,6 +103,26 @@ func New(ctx context.Context, configIface libconfig.Iface) (*ContainerServer, er
return nil, fmt.Errorf("cannot create container server: interface is nil")
}

if config.InternalRepair && ShutdownWasUnclean(config) {
checkOptions := cstorage.CheckEverything()
report, err := store.Check(checkOptions)
if err != nil {
err = HandleUncleanShutdown(config, store)
if err != nil {
return nil, err
}
}
options := cstorage.RepairOptions{
RemoveContainers: true,
}
if errs := store.Repair(report, &options); len(errs) > 0 {
err = HandleUncleanShutdown(config, store)
if err != nil {
return nil, err
}
}
}

imageService, err := storage.GetImageService(ctx, store, config)
if err != nil {
return nil, err
Expand Down Expand Up @@ -776,3 +797,37 @@ func (c *ContainerServer) UpdateContainerLinuxResources(ctr *oci.Container, reso

c.state.containers.Add(ctr.ID(), ctr)
}

func ShutdownWasUnclean(config *libconfig.Config) bool {
// CleanShutdownFile not configured, skip
if config.CleanShutdownFile == "" {
return false
}
// CleanShutdownFile isn't supported, skip
if _, err := os.Stat(config.CleanShutdownSupportedFileName()); err != nil {
return false
}
// CleanShutdownFile is present, indicating clean shutdown
if _, err := os.Stat(config.CleanShutdownFile); err == nil {
return false
}
return true
}

func HandleUncleanShutdown(config *libconfig.Config, store cstorage.Store) error {
logrus.Infof("File %s not found. Wiping storage directory %s because of suspected dirty shutdown", config.CleanShutdownFile, store.GraphRoot())
// If we do not do this, we may leak other resources that are not directly in the graphroot.
// Erroring here should not be fatal though, it's a best effort cleanup
if err := store.Wipe(); err != nil {
logrus.Infof("Failed to wipe storage cleanly: %v", err)
}
// unmount storage or else we will fail with EBUSY
if _, err := store.Shutdown(false); err != nil {
return fmt.Errorf("failed to shutdown storage before wiping: %w", err)
}
// totally remove storage, whatever is left (possibly orphaned layers)
if err := os.RemoveAll(store.GraphRoot()); err != nil {
return fmt.Errorf("failed to remove storage directory: %w", err)
}
return nil
}
4 changes: 4 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,9 @@ type RootConfig struct {
// If set to false, one must use the external command `crio wipe` to wipe the containers and images in these situations.
// The option InternalWipe is deprecated, and will be removed in a future release.
InternalWipe bool `toml:"internal_wipe"`

// InternalRepair is used to repair the affected images.
InternalRepair bool `toml:"internal_repair"`
}

// GetStore returns the container storage for a given configuration
Expand Down Expand Up @@ -810,6 +813,7 @@ func DefaultConfig() (*Config, error) {
VersionFile: CrioVersionPathTmp,
CleanShutdownFile: CrioCleanShutdownFile,
InternalWipe: true,
InternalRepair: false,
},
APIConfig: APIConfig{
Listen: CrioSocketPath,
Expand Down
11 changes: 11 additions & 0 deletions pkg/config/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ func initCrioTemplateConfig(c *Config) ([]*templateConfigValue, error) {
group: crioRootConfig,
isDefaultValue: simpleEqual(dc.InternalWipe, c.InternalWipe),
},
{
templateString: templateStringCrioInternalRepair,
group: crioRootConfig,
isDefaultValue: simpleEqual(dc.InternalRepair, c.InternalRepair),
},
{
templateString: templateStringCrioCleanShutdownFile,
group: crioRootConfig,
Expand Down Expand Up @@ -764,6 +769,12 @@ const templateStringCrioInternalWipe = `# InternalWipe is whether CRI-O should w

`

const templateStringCrioInternalRepair = `# InternalRepair is whether CRI-O should check if the container and image storage was corrupted after a sudden restart.
# If it was, CRI-O also attempts to repair the storage.
{{ $.Comment }}internal_repair = {{ .InternalRepair }}

`

const templateStringCrioAPI = `# The crio.api table contains settings for the kubelet/gRPC interface.
[crio.api]

Expand Down
23 changes: 23 additions & 0 deletions test/crio-wipe.bats
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,26 @@ function start_crio_with_stopped_pod() {
# make sure network resources were cleaned up
run ! ls "$CNI_RESULTS_DIR"/*"$pod_id"*
}

@test "clean up image if corrupted on server restore" {
setup_crio
touch "$CONTAINER_CLEAN_SHUTDOWN_FILE.supported"

# Remove a random layer
layer=$(find "$TESTDIR/crio/overlay" -maxdepth 1 -regextype sed -regex '.*/[a-f0-9\-]\{64\}.*' | sort -R | head -n 1)
rm -fr "$layer"

# Since the clean shutdown supported file is created,
# but the clean shutdown file is absent, we will do the
# c/storage check/repair.
CONTAINER_INTERNAL_REPAIR=true start_crio_no_setup

# Since one of the layers was removed, the image would be corrupted, so we expect
# one to have been removed.
num_images=${#IMAGES[@]}

# We start with $num_images images, and remove one with the layer removal above.
# `crictl images` adds one additional row for the table header.
# Thus, this is really $(crictl images | wc -l) - 1 (for the removed image) + 1 (for the header).
[[ $(crictl images | wc -l) == "$num_images" ]]
}