Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmd/crio/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,12 @@ func main() {

app.Commands = criocli.DefaultCommands
app.Commands = append(app.Commands,
criocli.CheckCommand,
criocli.ConfigCommand,
criocli.PublishCommand,
criocli.StatusCommand,
criocli.VersionCommand,
criocli.WipeCommand,
criocli.StatusCommand,
)

slices.SortFunc(app.Commands, func(a, b *cli.Command) int {
Expand Down
3 changes: 2 additions & 1 deletion completions/bash/crio
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ _cli_bash_autocomplete() {
local cur opts base
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
opts="complete
opts="check
complete
completion
config
man
Expand Down
27 changes: 26 additions & 1 deletion completions/fish/crio.fish
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

function __fish_crio_no_subcommand --description 'Test if there has been any subcommand yet'
for i in (commandline -opc)
if contains -- $i complete completion help h config man markdown md status config c containers container cs s info i version wipe help h
if contains -- $i check complete completion help h config man markdown md status config c containers container cs s info i version wipe help h
return 1
end
end
Expand Down Expand Up @@ -172,6 +172,31 @@ complete -c crio -n '__fish_crio_no_subcommand' -f -l help -s h -d 'show help'
complete -c crio -n '__fish_crio_no_subcommand' -f -l version -s v -d 'print the version'
complete -c crio -n '__fish_crio_no_subcommand' -f -l help -s h -d 'show help'
complete -c crio -n '__fish_crio_no_subcommand' -f -l version -s v -d 'print the version'
complete -c crio -n '__fish_seen_subcommand_from check' -f -l help -s h -d 'show help'
complete -r -c crio -n '__fish_crio_no_subcommand' -a 'check' -d 'Check CRI-O storage directory for errors.

This command can also repair damaged containers, images and layers.

By default, the data integrity of the storage directory is verified,
which can be an I/O and CPU-intensive operation. The --quick option
can be used to reduce the number of checks run.

When using the --repair option, especially with the --force option,
CRI-O and any currently running containers should be stopped if
possible to ensure no concurrent access to the storage directory
occurs.

The --wipe option can be used to automatically attempt to remove
containers and images on a repair failure. This option, combined
with the --force option, can be used to entirely remove the storage
directory content in case of irrecoverable errors. This should be
used as a last resort, and similarly to the --repair option, it\'s
best if CRI-O and any currently running containers are stopped.'
complete -c crio -n '__fish_seen_subcommand_from check' -f -l age -s a -r -d 'Maximum allowed age for unreferenced layers'
complete -c crio -n '__fish_seen_subcommand_from check' -f -l force -s f -d 'Remove damaged containers'
complete -c crio -n '__fish_seen_subcommand_from check' -f -l repair -s r -d 'Remove damaged images and layers'
complete -c crio -n '__fish_seen_subcommand_from check' -f -l quick -s q -d 'Perform only quick checks'
complete -c crio -n '__fish_seen_subcommand_from check' -f -l wipe -s w -d 'Wipe storage directory on repair failure'
complete -c crio -n '__fish_seen_subcommand_from complete completion' -f -l help -s h -d 'show help'
complete -r -c crio -n '__fish_crio_no_subcommand' -a 'complete completion' -d 'Generate bash, fish or zsh completions.'
complete -c crio -n '__fish_seen_subcommand_from complete completion' -f -l help -s h -d 'show help'
Expand Down
19 changes: 19 additions & 0 deletions completions/zsh/_crio
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,25 @@ _cli_zsh_autocomplete() {

local -a cmds
cmds=(
"check:Check CRI-O storage directory for errors.

This command can also repair damaged containers, images and layers.

By default, the data integrity of the storage directory is verified,
which can be an I/O and CPU-intensive operation. The --quick option
can be used to reduce the number of checks run.

When using the --repair option, especially with the --force option,
CRI-O and any currently running containers should be stopped if
possible to ensure no concurrent access to the storage directory
occurs.

The --wipe option can be used to automatically attempt to remove
containers and images on a repair failure. This option, combined
with the --force option, can be used to entirely remove the storage
directory content in case of irrecoverable errors. This should be
used as a last resort, and similarly to the --repair option, it's
best if CRI-O and any currently running containers are stopped."
'complete:Generate bash, fish or zsh completions.'
'completion:Generate bash, fish or zsh completions.'
'config:Outputs a commented version of the configuration file that could be used
Expand Down
1 change: 1 addition & 0 deletions contrib/test/ci/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
loop: "{{ ['cgroups.bats'] | product(kata_skip_cgroups_tests) \
+ ['command.bats'] | product(kata_skip_command_tests) \
+ ['reload_config.bats'] | product(kata_skip_reload_config) \
+ ['crio-check.bats'] | product(kata_skip_crio_check_tests) \
+ ['crio-wipe.bats'] | product(kata_skip_crio_wipe_tests) \
+ ['ctr.bats'] | product(kata_skip_ctr_tests) \
+ ['devices.bats'] | product(kata_skip_devices_tests) \
Expand Down
3 changes: 3 additions & 0 deletions contrib/test/ci/vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,15 @@ kata_skip_command_tests:
- 'test "log max boundary testing"'
kata_skip_reload_config:
- 'test "reload config should remove pinned images when an empty list is provided"'
kata_skip_crio_check_tests:
- 'test "storage directory check should wipe everything on repair errors"'
kata_skip_crio_wipe_tests:
- 'test "clear neither when remove persist"'
- "test \"don't clear containers on a forced restart of crio\""
- "test \"don't clear containers if clean shutdown supported file not present\""
- "test \"internal_wipe don't clear containers on a forced restart of crio\""
- 'test "internal_wipe eventually cleans network on forced restart of crio if network is slow to come up"'
- 'test "recover from badly corrupted storage directory"'
kata_skip_ctr_tests:
- 'test "ctr logging"'
- 'test "ctr journald logging"'
Expand Down
32 changes: 32 additions & 0 deletions docs/crio.8.md
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,38 @@ crio [GLOBAL OPTIONS] command [COMMAND OPTIONS] [ARGUMENTS...]

# COMMANDS

## check

Check CRI-O storage directory for errors.

This command can also repair damaged containers, images and layers.

By default, the data integrity of the storage directory is verified,
which can be an I/O and CPU-intensive operation. The --quick option
can be used to reduce the number of checks run.

When using the --repair option, especially with the --force option,
CRI-O and any currently running containers should be stopped if
possible to ensure no concurrent access to the storage directory
occurs.

The --wipe option can be used to automatically attempt to remove
containers and images on a repair failure. This option, combined
with the --force option, can be used to entirely remove the storage
directory content in case of irrecoverable errors. This should be
used as a last resort, and similarly to the --repair option, it's
best if CRI-O and any currently running containers are stopped.

**--age, -a**="": Maximum allowed age for unreferenced layers (default: "24h")

**--force, -f**: Remove damaged containers

**--quick, -q**: Perform only quick checks

**--repair, -r**: Remove damaged images and layers

**--wipe, -w**: Wipe storage directory on repair failure

## complete, completion

Generate bash, fish or zsh completions.
Expand Down
2 changes: 1 addition & 1 deletion docs/crio.conf.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ CRI-O reads its storage defaults from the containers-storage.conf(5) file locate
Whether CRI-O should wipe containers after a reboot and images after an upgrade when the server starts.
If set to false, one must run `crio wipe` to wipe the containers and images in these situations.

**internal_repair**=false
**internal_repair**=true
InternalRepair is whether CRI-O should check if the container and image storage was corrupted after a sudden restart.
If it was, CRI-O also attempts to repair the storage.

Expand Down
195 changes: 195 additions & 0 deletions internal/criocli/check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
package criocli

import (
"fmt"

"github.com/containers/storage"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"

"github.com/cri-o/cri-o/internal/lib"
"github.com/cri-o/cri-o/utils"
)

type checkErrors map[string][]error

var CheckCommand = &cli.Command{
Name: "check",
Usage: usageText,
Action: crioCheck,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "age",
Aliases: []string{"a"},
Value: "24h",
Usage: "Maximum allowed age for unreferenced layers",
},
&cli.BoolFlag{
Name: "force",
Aliases: []string{"f"},
Usage: "Remove damaged containers",
},
&cli.BoolFlag{
Name: "repair",
Aliases: []string{"r"},
Usage: "Remove damaged images and layers",
},
&cli.BoolFlag{
Name: "quick",
Aliases: []string{"q"},
Usage: "Perform only quick checks",
},
&cli.BoolFlag{
Name: "wipe",
Aliases: []string{"w"},
Usage: "Wipe storage directory on repair failure",
},
},
}

func crioCheck(c *cli.Context) error {
config, err := GetConfigFromContext(c)
if err != nil {
return fmt.Errorf("unable to load configuration: %w", err)
}

store, err := config.GetStore()
if err != nil {
return fmt.Errorf("unable to open storage: %w", err)
}
defer func() {
if _, err := store.Shutdown(true); err != nil {
logrus.Errorf("Unable to shutdown storage: %v", err)
}
}()

graphRoot := store.GraphRoot()
logrus.Infof("Checking storage directory %s for errors", graphRoot)

checkOptions := storage.CheckEverything()
if c.Bool("quick") {
// This is not the same as the "quick" check that CRI-O performs during its start-up
// following an unclean shutdown, as this one would set the `LayerDigests` option,
// which is I/O and CPU intensive, whereas the other one does not.
checkOptions = storage.CheckMost()
}

// The maximum unreferenced layer age.
layerAge := c.String("age")
if layerAge != "" {
age, err := utils.ParseDuration(layerAge)
if err != nil {
return fmt.Errorf("unable to parse age duration: %w", err)
}
checkOptions.LayerUnreferencedMaximumAge = &age
}

report, err := store.Check(checkOptions)
if err != nil {
return fmt.Errorf("unable to check storage: %w", err)
}

// Walk each report and show details...
for prefix, checkReport := range map[string]checkErrors{
"layer": report.Layers,
"read-only layer": report.ROLayers,
"image": report.Images,
"read-only image": report.ROImages,
"container": report.Containers,
} {
for identifier, errs := range checkReport {
for _, err := range errs {
logrus.Debugf("%s: %s: %v", prefix, identifier, err)
}
}
}

seenStorageErrors := lib.CheckReportHasErrors(report)
logrus.Debugf("Storage directory %s has errors: %t", graphRoot, seenStorageErrors)

if !c.Bool("repair") {
if seenStorageErrors {
logrus.Warnf("Errors found while checking storage directory %s for errors", graphRoot)
return fmt.Errorf(
"%d layer errors, %d read-only layer errors, %d image errors, %d read-only image errors, %d container errors",
len(report.Layers),
len(report.ROLayers),
len(report.Images),
len(report.ROImages),
len(report.Containers),
)
}
return nil
}

force := c.Bool("force")
if force {
logrus.Warn("The `force` option has been set, repair will attempt to remove damaged containers")
}
logrus.Infof("Attempting to repair storage directory %s", graphRoot)

errs := store.Repair(report, &storage.RepairOptions{
RemoveContainers: force,
})
if len(errs) != 0 {
for _, err := range errs {
logrus.Error(err)
}

if c.Bool("wipe") {
// Depending on whether the `force` option is set or not, this will remove the
// storage directory completely while ignoring any running containers. Otherwise,
// this will fail if there are any containers currently running.
if force {
logrus.Warn("The `force` option has been set, storage directory will be forcefully removed")
}
logrus.Infof("Wiping storage directory %s", graphRoot)
return lib.RemoveStorageDirectory(config, store, force)
}

return errs[0]
}

if len(report.ROLayers) > 0 || len(report.ROImages) > 0 || (!force && len(report.Containers) > 0) {
if force {
// Any damaged containers would have been deleted at this point.
return fmt.Errorf(
"%d read-only layer errors, %d read-only image errors",
len(report.ROLayers),
len(report.ROImages),
)
}
return fmt.Errorf(
"%d read-only layer errors, %d read-only image errors, %d container errors",
len(report.ROLayers),
len(report.ROImages),
len(report.Containers),
)
}

return nil
}

// The `Description` field will not be rendered when the documentation
// is generated, and using `Usage` makes the formatting wrong when the
// command-line help is rendered. Shell completions might also be
// incorrect.
var usageText = `Check CRI-O storage directory for errors.

This command can also repair damaged containers, images and layers.

By default, the data integrity of the storage directory is verified,
which can be an I/O and CPU-intensive operation. The --quick option
can be used to reduce the number of checks run.

When using the --repair option, especially with the --force option,
CRI-O and any currently running containers should be stopped if
possible to ensure no concurrent access to the storage directory
occurs.

The --wipe option can be used to automatically attempt to remove
containers and images on a repair failure. This option, combined
with the --force option, can be used to entirely remove the storage
directory content in case of irrecoverable errors. This should be
used as a last resort, and similarly to the --repair option, it's
best if CRI-O and any currently running containers are stopped.`
8 changes: 7 additions & 1 deletion internal/criocli/wipe.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,13 @@ func crioWipe(c *cli.Context) error {
// Note: this is only needed if the node rebooted.
// If there wasn't time to sync, we should clear the storage directory
if shouldWipeContainers && lib.ShutdownWasUnclean(config) {
return lib.HandleUncleanShutdown(config, store)
logrus.Infof(
"File %s not found. Wiping storage directory %s because of suspected unclean shutdown",
config.CleanShutdownFile,
store.GraphRoot(),
)
// This will fail if there are any containers currently running.
return lib.RemoveStorageDirectory(config, store, false)
}

// If crio is configured to wipe internally (and `--force` wasn't set)
Expand Down
Loading