@@ -20,7 +20,9 @@ package kubenet
2020
2121import (
2222 "fmt"
23+ "io/ioutil"
2324 "net"
25+ "path/filepath"
2426 "strings"
2527 "sync"
2628 "syscall"
@@ -45,8 +47,9 @@ import (
4547 utilsets "k8s.io/kubernetes/pkg/util/sets"
4648 utilsysctl "k8s.io/kubernetes/pkg/util/sysctl"
4749
48- "k8s.io/kubernetes/pkg/kubelet/network/hostport"
4950 "strconv"
51+
52+ "k8s.io/kubernetes/pkg/kubelet/network/hostport"
5053)
5154
5255const (
@@ -67,6 +70,10 @@ const (
6770
6871 // ebtables Chain to store dedup rules
6972 dedupChain = utilebtables .Chain ("KUBE-DEDUP" )
73+
74+ // defaultIPAMDir is the default location for the checkpoint files stored by host-local ipam
75+ // https://github.com/containernetworking/cni/tree/master/plugins/ipam/host-local#backends
76+ defaultIPAMDir = "/var/lib/cni/networks"
7077)
7178
7279// CNI plugins required by kubenet in /opt/cni/bin or vendor directory
@@ -394,13 +401,13 @@ func (plugin *kubenetNetworkPlugin) setup(namespace string, name string, id kube
394401 plugin .podIPs [id ] = ip4 .String ()
395402
396403 // Open any hostports the pod's containers want
397- runningPods , err := plugin .getRunningPods ()
404+ activePods , err := plugin .getActivePods ()
398405 if err != nil {
399406 return err
400407 }
401408
402- newPod := & hostport.RunningPod {Pod : pod , IP : ip4 }
403- if err := plugin .hostportHandler .OpenPodHostportsAndSync (newPod , BridgeName , runningPods ); err != nil {
409+ newPod := & hostport.ActivePod {Pod : pod , IP : ip4 }
410+ if err := plugin .hostportHandler .OpenPodHostportsAndSync (newPod , BridgeName , activePods ); err != nil {
404411 return err
405412 }
406413
@@ -432,6 +439,16 @@ func (plugin *kubenetNetworkPlugin) SetUpPod(namespace string, name string, id k
432439 // Not a hard error or warning
433440 glog .V (4 ).Infof ("Failed to clean up %s/%s after SetUpPod failure: %v" , namespace , name , err )
434441 }
442+
443+ // TODO: Remove this hack once we've figured out how to retrieve the netns
444+ // of an exited container. Currently, restarting docker will leak a bunch of
445+ // ips. This will exhaust available ip space unless we cleanup old ips. At the
446+ // same time we don't want to try GC'ing them periodically as that could lead
447+ // to a performance regression in starting pods. So on each setup failure, try
448+ // GC on the assumption that the kubelet is going to retry pod creation, and
449+ // when it does, there will be ips.
450+ plugin .ipamGarbageCollection ()
451+
435452 return err
436453 }
437454
@@ -468,9 +485,9 @@ func (plugin *kubenetNetworkPlugin) teardown(namespace string, name string, id k
468485 }
469486 }
470487
471- runningPods , err := plugin .getRunningPods ()
488+ activePods , err := plugin .getActivePods ()
472489 if err == nil {
473- err = plugin .hostportHandler .SyncHostports (BridgeName , runningPods )
490+ err = plugin .hostportHandler .SyncHostports (BridgeName , activePods )
474491 }
475492 if err != nil {
476493 errList = append (errList , err )
@@ -571,15 +588,31 @@ func (plugin *kubenetNetworkPlugin) checkCNIPluginInDir(dir string) bool {
571588 return true
572589}
573590
574- // Returns a list of pods running on this node and each pod's IP address. Assumes
575- // PodSpecs retrieved from the runtime include the name and ID of containers in
576- // each pod.
577- func (plugin * kubenetNetworkPlugin ) getRunningPods () ([]* hostport.RunningPod , error ) {
578- pods , err := plugin .host .GetRuntime ().GetPods (false )
591+ // getNonExitedPods returns a list of pods that have at least one running container.
592+ func (plugin * kubenetNetworkPlugin ) getNonExitedPods () ([]* kubecontainer.Pod , error ) {
593+ ret := []* kubecontainer.Pod {}
594+ pods , err := plugin .host .GetRuntime ().GetPods (true )
579595 if err != nil {
580596 return nil , fmt .Errorf ("Failed to retrieve pods from runtime: %v" , err )
581597 }
582- runningPods := make ([]* hostport.RunningPod , 0 )
598+ for _ , p := range pods {
599+ if podIsExited (p ) {
600+ continue
601+ }
602+ ret = append (ret , p )
603+ }
604+ return ret , nil
605+ }
606+
607+ // Returns a list of pods running or ready to run on this node and each pod's IP address.
608+ // Assumes PodSpecs retrieved from the runtime include the name and ID of containers in
609+ // each pod.
610+ func (plugin * kubenetNetworkPlugin ) getActivePods () ([]* hostport.ActivePod , error ) {
611+ pods , err := plugin .getNonExitedPods ()
612+ if err != nil {
613+ return nil , err
614+ }
615+ activePods := make ([]* hostport.ActivePod , 0 )
583616 for _ , p := range pods {
584617 containerID , err := plugin .host .GetRuntime ().GetPodContainerID (p )
585618 if err != nil {
@@ -594,13 +627,94 @@ func (plugin *kubenetNetworkPlugin) getRunningPods() ([]*hostport.RunningPod, er
594627 continue
595628 }
596629 if pod , ok := plugin .host .GetPodByName (p .Namespace , p .Name ); ok {
597- runningPods = append (runningPods , & hostport.RunningPod {
630+ activePods = append (activePods , & hostport.ActivePod {
598631 Pod : pod ,
599632 IP : podIP ,
600633 })
601634 }
602635 }
603- return runningPods , nil
636+ return activePods , nil
637+ }
638+
639+ // ipamGarbageCollection will release unused IP.
640+ // kubenet uses the CNI bridge plugin, which stores allocated ips on file. Each
641+ // file created under defaultIPAMDir has the format: ip/container-hash. So this
642+ // routine looks for hashes that are not reported by the currently running docker,
643+ // and invokes DelNetwork on each one. Note that this will only work for the
644+ // current CNI bridge plugin, because we have no way of finding the NetNs.
645+ func (plugin * kubenetNetworkPlugin ) ipamGarbageCollection () {
646+ glog .V (2 ).Infof ("Starting IP garbage collection" )
647+
648+ ipamDir := filepath .Join (defaultIPAMDir , KubenetPluginName )
649+ files , err := ioutil .ReadDir (ipamDir )
650+ if err != nil {
651+ glog .Errorf ("Failed to list files in %q: %v" , ipamDir , err )
652+ return
653+ }
654+
655+ // gather containerIDs for allocated ips
656+ ipContainerIdMap := make (map [string ]string )
657+ for _ , file := range files {
658+ // skip non checkpoint file
659+ if ip := net .ParseIP (file .Name ()); ip == nil {
660+ continue
661+ }
662+
663+ content , err := ioutil .ReadFile (filepath .Join (ipamDir , file .Name ()))
664+ if err != nil {
665+ glog .Errorf ("Failed to read file %v: %v" , file , err )
666+ }
667+ ipContainerIdMap [file .Name ()] = strings .TrimSpace (string (content ))
668+ }
669+
670+ // gather infra container IDs of current running Pods
671+ runningContainerIDs := utilsets.String {}
672+ pods , err := plugin .getNonExitedPods ()
673+ if err != nil {
674+ glog .Errorf ("Failed to get pods: %v" , err )
675+ return
676+ }
677+ for _ , pod := range pods {
678+ containerID , err := plugin .host .GetRuntime ().GetPodContainerID (pod )
679+ if err != nil {
680+ glog .Warningf ("Failed to get infra containerID of %q/%q: %v" , pod .Namespace , pod .Name , err )
681+ continue
682+ }
683+
684+ runningContainerIDs .Insert (strings .TrimSpace (containerID .ID ))
685+ }
686+
687+ // release leaked ips
688+ for ip , containerID := range ipContainerIdMap {
689+ // if the container is not running, release IP
690+ if runningContainerIDs .Has (containerID ) {
691+ continue
692+ }
693+ // CNI requires all config to be presented, although only containerID is needed in this case
694+ rt := & libcni.RuntimeConf {
695+ ContainerID : containerID ,
696+ IfName : network .DefaultInterfaceName ,
697+ // TODO: How do we find the NetNs of an exited container? docker inspect
698+ // doesn't show us the pid, so we probably need to checkpoint
699+ NetNS : "" ,
700+ }
701+
702+ glog .V (2 ).Infof ("Releasing IP %q allocated to %q." , ip , containerID )
703+ // CNI bridge plugin should try to release IP and then return
704+ if err := plugin .cniConfig .DelNetwork (plugin .netConfig , rt ); err != nil {
705+ glog .Errorf ("Error while releasing IP: %v" , err )
706+ }
707+ }
708+ }
709+
710+ // podIsExited returns true if the pod is exited (all containers inside are exited).
711+ func podIsExited (p * kubecontainer.Pod ) bool {
712+ for _ , c := range p .Containers {
713+ if c .State != kubecontainer .ContainerStateExited {
714+ return false
715+ }
716+ }
717+ return true
604718}
605719
606720func (plugin * kubenetNetworkPlugin ) buildCNIRuntimeConf (ifName string , id kubecontainer.ContainerID ) (* libcni.RuntimeConf , error ) {
0 commit comments