Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions internal/resourcestore/resourcestore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package resourcestore

import (
"sync"
"time"

"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)

const sleepTimeBeforeCleanup = 1 * time.Minute

// ResourceStore is a structure that saves information about a recently created resource.
// Resources can be added and retrieved from the store. A retrieval (Get) also removes the Resource from the store.
// The ResourceStore comes with a cleanup routine that loops through the resources and marks them as stale, or removes
// them if they're already stale, then sleeps for `timeout`.
// Thus, it takes between `timeout` and `2*timeout` for unrequested resources to be cleaned up.
// Another routine can request a watcher for a resource by calling WatcherForResource.
// All watchers will be notified when the resource has successfully been created.
type ResourceStore struct {
resources map[string]*Resource
timeout time.Duration
sync.Mutex
}

// Resource contains the actual resource itself (which must implement the IdentifiableCreatable interface),
// as well as stores function pointers that pertain to how that resource should be cleaned up,
// and keeps track of other requests that are watching for the successful creation of this resource.
type Resource struct {
resource IdentifiableCreatable
cleanupFuncs []func()
watchers []chan struct{}
stale bool
name string
}

// IdentifiableCreatable are the qualities needed by the caller of the resource.
// Once a resource is retrieved, SetCreated() will be called, indicating to the server
// that resource is ready to be listed and operated upon, and ID() will be used to identify the
// newly created resource to the server.
type IdentifiableCreatable interface {
ID() string
SetCreated()
}

// New creates a new ResourceStore, with a default timeout, and starts the cleanup function
func New() *ResourceStore {
return NewWithTimeout(sleepTimeBeforeCleanup)
}

// NewWithTimeout is used for testing purposes. It allows the caller to set the timeout, allowing for faster tests.
// Most callers should use New instead.
func NewWithTimeout(timeout time.Duration) *ResourceStore {
rc := &ResourceStore{
resources: make(map[string]*Resource),
timeout: timeout,
}
go rc.cleanupStaleResources()
return rc
}

// cleanupStaleResources is responsible for cleaning up resources that haven't been gotten
// from the store.
// It runs on a loop, sleeping `sleepTimeBeforeCleanup` between each loop.
// A resource will first be marked as stale before being cleaned up.
// This means a resource will stay in the store between `sleepTimeBeforeCleanup` and `2*sleepTimeBeforeCleanup`.
// When a resource is cleaned up, it's removed from the store and its cleanupFuncs are called.
func (rc *ResourceStore) cleanupStaleResources() {
for {
time.Sleep(rc.timeout)
resourcesToReap := []*Resource{}
rc.Lock()
for name, r := range rc.resources {
if r.stale {
resourcesToReap = append(resourcesToReap, r)
delete(rc.resources, name)
}
r.stale = true
}
// no need to hold the lock when running the cleanup functions
rc.Unlock()

for _, r := range resourcesToReap {
logrus.Infof("cleaning up stale resource %s", r.name)
for _, f := range r.cleanupFuncs {
f()
}
}
}
}

// Get attempts to look up a resource by its name.
// If it's found, it's removed from the store, and it is set as created.
// Get returns an empty ID if the resource is not found,
// and returns the value of the Resource's ID() method if it is.
func (rc *ResourceStore) Get(name string) string {
rc.Lock()
defer rc.Unlock()

r, ok := rc.resources[name]
if !ok {
return ""
}
delete(rc.resources, name)
r.resource.SetCreated()
return r.resource.ID()
}

// Put takes a unique resource name (retrieved from the client request, not generated by the server)
// a newly created resource, and functions to cleanup that newly created resource.
// It adds the Resource to the ResourceStore, as well as starts a go routine that is responsible for cleaning up the
// resource if the server has not gotten another request for it (i.e. if Get has not been called).
// Put expects `name`s passed to it to be unique. If a duplicate `name` is detected, it returns an error.
func (rc *ResourceStore) Put(name string, resource IdentifiableCreatable, cleanupFuncs []func()) error {
rc.Lock()
defer rc.Unlock()

r, ok := rc.resources[name]
// if we don't already have a resource, create it
if !ok {
r = &Resource{}
rc.resources[name] = r
}
// make sure the resource hasn't already been added to the store
if r.resource != nil || r.cleanupFuncs != nil {
return errors.Errorf("failed to add entry %s to ResourceStore; entry already exists", name)
}

r.resource = resource
r.cleanupFuncs = cleanupFuncs
r.name = name

// now the resource is created, notify the watchers
for _, w := range r.watchers {
w <- struct{}{}
}
return nil
}

// WatcherForResource looks up a Resource by name, and gives it a watcher if it's found.
// A watcher can be used for concurrent processes to wait for the resource to be created.
// This is useful for situations where clients retry requests quickly after they "fail" because
// they've taken too long. Adding a watcher allows the server to slow down the client, but still
// return the resource in a timely manner once it's actually created.
func (rc *ResourceStore) WatcherForResource(name string) chan struct{} {
rc.Lock()
defer rc.Unlock()
watcher := make(chan struct{}, 1)
r, ok := rc.resources[name]
if !ok {
rc.resources[name] = &Resource{
watchers: []chan struct{}{watcher},
}
return watcher
}
r.watchers = append(r.watchers, watcher)
return watcher
}
114 changes: 114 additions & 0 deletions internal/resourcestore/resourcestore_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package resourcestore_test

import (
"time"

"github.com/cri-o/cri-o/internal/resourcestore"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)

var (
testName = "name"
testID = "id"
)

type entry struct {
id string
created bool
}

func (e *entry) ID() string {
return e.id
}

func (e *entry) SetCreated() {
e.created = true
}

// The actual test suite
var _ = t.Describe("ResourceStore", func() {
// Setup the test
var (
sut *resourcestore.ResourceStore
cleanupFuncs []func()
e *entry
)
BeforeEach(func() {
sut = resourcestore.New()
cleanupFuncs = make([]func(), 0)
e = &entry{
id: testID,
}
})
It("Put should be able to get resource after adding", func() {
// Given

// When
Expect(sut.Put(testName, e, cleanupFuncs)).To(BeNil())

// Then
id := sut.Get(testName)
Expect(id).To(Equal(e.id))

id = sut.Get(testName)
Expect(id).To(BeEmpty())
})
It("Put should fail to readd resource", func() {
// Given

// When
Expect(sut.Put(testName, e, cleanupFuncs)).To(BeNil())

// Then
Expect(sut.Put(testName, e, cleanupFuncs)).NotTo(BeNil())
})
It("Get should call SetCreated", func() {
// When
Expect(sut.Put(testName, e, cleanupFuncs)).To(BeNil())

// Then
id := sut.Get(testName)
Expect(id).To(Equal(e.id))
Expect(e.created).To(BeTrue())
})
})

var _ = t.Describe("ResourceStore and timeout", func() {
// Setup the test
var (
sut *resourcestore.ResourceStore
cleanupFuncs []func()
e *entry
)
BeforeEach(func() {
cleanupFuncs = make([]func(), 0)
e = &entry{
id: testID,
}
})
It("Put should call cleanup funcs after timeout", func() {
// Given
timeout := 2 * time.Second
sut = resourcestore.NewWithTimeout(timeout)

timedOutChan := make(chan bool)
cleanupFuncs = append(cleanupFuncs, func() {
timedOutChan <- true
})
go func() {
time.Sleep(timeout * 3)
timedOutChan <- false
}()

// When
Expect(sut.Put(testName, e, cleanupFuncs)).To(BeNil())

// Then
didStoreCallTimeoutFunc := <-timedOutChan
Expect(didStoreCallTimeoutFunc).To(Equal(true))

id := sut.Get(testName)
Expect(id).To(BeEmpty())
})
})
25 changes: 25 additions & 0 deletions internal/resourcestore/suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package resourcestore_test

import (
"testing"

. "github.com/cri-o/cri-o/test/framework"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)

func TestResourceStore(t *testing.T) {
RegisterFailHandler(Fail)
RunFrameworkSpecs(t, "ResourceStore")
}

var t *TestFramework

var _ = BeforeSuite(func() {
t = NewTestFramework(NilFunc, NilFunc)
t.Setup()
})

var _ = AfterSuite(func() {
t.Teardown()
})
17 changes: 12 additions & 5 deletions server/container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq
cleanupFuncs := make([]func(), 0)
defer func() {
// no error, no need to cleanup
if retErr == nil {
if retErr == nil || isContextError(retErr) {
return
}
for i := len(cleanupFuncs) - 1; i >= 0; i-- {
Expand All @@ -488,7 +488,11 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq
}()

if _, err = s.ReserveContainerName(ctr.ID(), ctr.Name()); err != nil {
return nil, errors.Wrap(err, "Kubelet may be retrying requests that are timing out in CRI-O due to system load")
cachedID, resourceErr := s.getResourceOrWait(ctx, ctr.Name(), "container")
if resourceErr == nil {
return &pb.CreateContainerResponse{ContainerId: cachedID}, nil
}
return nil, errors.Wrapf(err, resourceErr.Error())
}

cleanupFuncs = append(cleanupFuncs, func() {
Expand Down Expand Up @@ -545,13 +549,16 @@ func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerReq
log.Warnf(ctx, "unable to write containers %s state to disk: %v", newContainer.ID(), err)
}

newContainer.SetCreated()

if ctx.Err() == context.Canceled || ctx.Err() == context.DeadlineExceeded {
if isContextError(ctx.Err()) {
if err := s.resourceStore.Put(ctr.Name(), newContainer, cleanupFuncs); err != nil {
log.Errorf(ctx, "createCtr: failed to save progress of container %s: %v", newContainer.ID(), err)
}
log.Infof(ctx, "createCtr: context was either canceled or the deadline was exceeded: %v", ctx.Err())
return nil, ctx.Err()
}

newContainer.SetCreated()

log.Infof(ctx, "Created container %s: %s", newContainer.ID(), newContainer.Description())
return &pb.CreateContainerResponse{
ContainerId: ctr.ID(),
Expand Down
8 changes: 6 additions & 2 deletions server/sandbox_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ import (
// or an error
func (s *Server) networkStart(ctx context.Context, sb *sandbox.Sandbox) (podIPs []string, result cnitypes.Result, retErr error) {
overallStart := time.Now()
// give a network Start call 2 minutes, half of a RunPodSandbox request timeout limit
startCtx, startCancel := context.WithTimeout(ctx, 2*time.Minute)
// Give a network Start call a full 5 minutes, independent of the context of the request.
// This is to prevent the CNI plugin from taking an unbounded amount of time,
// but to still allow a long-running sandbox creation to be cached and reused,
// rather than failing and recreating it.
const startTimeout = 5 * time.Minute
startCtx, startCancel := context.WithTimeout(context.Background(), startTimeout)
defer startCancel()

if sb.HostNetwork() {
Expand Down
16 changes: 11 additions & 5 deletions server/sandbox_run_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
cleanupFuncs := make([]func(), 0)
defer func() {
// no error, no need to cleanup
if retErr == nil {
if retErr == nil || isContextError(retErr) {
return
}
for i := len(cleanupFuncs) - 1; i >= 0; i-- {
Expand All @@ -319,7 +319,11 @@ func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
}()

if _, err := s.ReservePodName(sbox.ID(), sbox.Name()); err != nil {
return nil, errors.Wrap(err, "Kubelet may be retrying requests that are timing out in CRI-O due to system load")
cachedID, resourceErr := s.getResourceOrWait(ctx, sbox.Name(), "sandbox")
if resourceErr == nil {
return &pb.RunPodSandboxResponse{PodSandboxId: cachedID}, nil
}
return nil, errors.Wrapf(err, resourceErr.Error())
}

cleanupFuncs = append(cleanupFuncs, func() {
Expand Down Expand Up @@ -951,12 +955,14 @@ func (s *Server) runPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest
}
sb.AddIPs(ips)

sb.SetCreated()

if ctx.Err() == context.Canceled || ctx.Err() == context.DeadlineExceeded {
if isContextError(ctx.Err()) {
if err := s.resourceStore.Put(sbox.Name(), sb, cleanupFuncs); err != nil {
log.Errorf(ctx, "runSandbox: failed to save progress of sandbox %s: %v", sbox.ID(), err)
}
log.Infof(ctx, "runSandbox: context was either canceled or the deadline was exceeded: %v", ctx.Err())
return nil, ctx.Err()
}
sb.SetCreated()

log.Infof(ctx, "Ran pod sandbox %s with infra container: %s", container.ID(), container.Description())
resp = &pb.RunPodSandboxResponse{PodSandboxId: sbox.ID()}
Expand Down
Loading