Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d6dbd50

Browse files
author
Kubernetes Submit Queue
authored
Merge pull request #37093 from simonswine/fix-tolerate-unready-endpoints-pods-terminating
Automatic merge from submit-queue (batch tested with PRs 39092, 39126, 37380, 37093, 39237) Endpoints with TolerateUnready annotation, should list Pods in state terminating **What this PR does / why we need it**: We are using preStop lifecycle hooks to gracefully remove a node from a cluster. This hook is potentially long running and after the preStop hook is fired, the DNS resolution of the soon to be stopped Pod is failing, which causes a failure there. **Special notes for your reviewer**: Would be great to backport that to 1.4, 1.3 **Release note**: ```release-note Endpoints, that tolerate unready Pods, are now listing Pods in state Terminating as well ``` @bprashanth
2 parents 49fe0be + b44de1e commit d6dbd50

2 files changed

Lines changed: 102 additions & 11 deletions

File tree

pkg/controller/endpoint/endpoints_controller.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,13 @@ const (
5959
// An annotation on the Service denoting if the endpoints controller should
6060
// go ahead and create endpoints for unready pods. This annotation is
6161
// currently only used by StatefulSets, where we need the pod to be DNS
62-
// resolvable during initialization. In this situation we create a headless
63-
// service just for the StatefulSet, and clients shouldn't be using this Service
64-
// for anything so unready endpoints don't matter.
62+
// resolvable during initialization and termination. In this situation we
63+
// create a headless Service just for the StatefulSet, and clients shouldn't
64+
// be using this Service for anything so unready endpoints don't matter.
65+
// Endpoints of these Services retain their DNS records and continue
66+
// receiving traffic for the Service from the moment the kubelet starts all
67+
// containers in the pod and marks it "Running", till the kubelet stops all
68+
// containers and deletes the pod from the apiserver.
6569
TolerateUnreadyEndpointsAnnotation = "service.alpha.kubernetes.io/tolerate-unready-endpoints"
6670
)
6771

@@ -403,7 +407,7 @@ func (e *EndpointController) syncService(key string) error {
403407
glog.V(5).Infof("Failed to find an IP for pod %s/%s", pod.Namespace, pod.Name)
404408
continue
405409
}
406-
if pod.DeletionTimestamp != nil {
410+
if !tolerateUnreadyEndpoints && pod.DeletionTimestamp != nil {
407411
glog.V(5).Infof("Pod is being deleted %s/%s", pod.Namespace, pod.Name)
408412
continue
409413
}

test/e2e/service.go

Lines changed: 94 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,7 +1048,7 @@ var _ = framework.KubeDescribe("Services", func() {
10481048
})
10491049

10501050
It("should create endpoints for unready pods", func() {
1051-
serviceName := "never-ready"
1051+
serviceName := "tolerate-unready"
10521052
ns := f.Namespace.Name
10531053

10541054
t := NewServerTest(cs, ns, serviceName)
@@ -1060,22 +1060,49 @@ var _ = framework.KubeDescribe("Services", func() {
10601060
}
10611061
}()
10621062

1063-
service := t.BuildServiceSpec()
1064-
service.Annotations = map[string]string{endpoint.TolerateUnreadyEndpointsAnnotation: "true"}
1063+
t.name = "slow-terminating-unready-pod"
1064+
t.image = "gcr.io/google_containers/netexec:1.7"
1065+
port := 80
1066+
terminateSeconds := int64(600)
1067+
1068+
service := &v1.Service{
1069+
ObjectMeta: v1.ObjectMeta{
1070+
Name: t.ServiceName,
1071+
Namespace: t.Namespace,
1072+
Annotations: map[string]string{endpoint.TolerateUnreadyEndpointsAnnotation: "true"},
1073+
},
1074+
Spec: v1.ServiceSpec{
1075+
Selector: t.Labels,
1076+
Ports: []v1.ServicePort{{
1077+
Name: "http",
1078+
Port: int32(port),
1079+
TargetPort: intstr.FromInt(port),
1080+
}},
1081+
},
1082+
}
10651083
rcSpec := rcByNameContainer(t.name, 1, t.image, t.Labels, v1.Container{
1084+
Args: []string{fmt.Sprintf("--http-port=%d", port)},
10661085
Name: t.name,
10671086
Image: t.image,
1068-
Ports: []v1.ContainerPort{{ContainerPort: int32(80), Protocol: v1.ProtocolTCP}},
1087+
Ports: []v1.ContainerPort{{ContainerPort: int32(port), Protocol: v1.ProtocolTCP}},
10691088
ReadinessProbe: &v1.Probe{
10701089
Handler: v1.Handler{
10711090
Exec: &v1.ExecAction{
10721091
Command: []string{"/bin/false"},
10731092
},
10741093
},
10751094
},
1095+
Lifecycle: &v1.Lifecycle{
1096+
PreStop: &v1.Handler{
1097+
Exec: &v1.ExecAction{
1098+
Command: []string{"/bin/sleep", fmt.Sprintf("%d", terminateSeconds)},
1099+
},
1100+
},
1101+
},
10761102
}, nil)
1103+
rcSpec.Spec.Template.Spec.TerminationGracePeriodSeconds = &terminateSeconds
10771104

1078-
By(fmt.Sprintf("createing RC %v with selectors %v", rcSpec.Name, rcSpec.Spec.Selector))
1105+
By(fmt.Sprintf("creating RC %v with selectors %v", rcSpec.Name, rcSpec.Spec.Selector))
10791106
_, err := t.createRC(rcSpec)
10801107
framework.ExpectNoError(err)
10811108

@@ -1087,10 +1114,10 @@ var _ = framework.KubeDescribe("Services", func() {
10871114
framework.ExpectNoError(framework.VerifyPods(t.Client, t.Namespace, t.name, false, 1))
10881115

10891116
svcName := fmt.Sprintf("%v.%v", serviceName, f.Namespace.Name)
1090-
By("waiting for endpoints of Service with DNS name " + svcName)
1117+
By("Waiting for endpoints of Service with DNS name " + svcName)
10911118

10921119
execPodName := createExecPodOrFail(f.ClientSet, f.Namespace.Name, "execpod-")
1093-
cmd := fmt.Sprintf("wget -qO- %v", svcName)
1120+
cmd := fmt.Sprintf("wget -qO- http://%s:%d/", svcName, port)
10941121
var stdout string
10951122
if pollErr := wait.PollImmediate(framework.Poll, kubeProxyLagTimeout, func() (bool, error) {
10961123
var err error
@@ -1103,6 +1130,66 @@ var _ = framework.KubeDescribe("Services", func() {
11031130
}); pollErr != nil {
11041131
framework.Failf("expected un-ready endpoint for Service %v within %v, stdout: %v", t.name, kubeProxyLagTimeout, stdout)
11051132
}
1133+
1134+
By("Scaling down replication controler to zero")
1135+
framework.ScaleRC(f.ClientSet, f.InternalClientset, t.Namespace, rcSpec.Name, 0, false)
1136+
1137+
By("Update service to not tolerate unready services")
1138+
_, err = updateService(f.ClientSet, t.Namespace, t.ServiceName, func(s *v1.Service) {
1139+
s.ObjectMeta.Annotations[endpoint.TolerateUnreadyEndpointsAnnotation] = "false"
1140+
})
1141+
framework.ExpectNoError(err)
1142+
1143+
By("Check if pod is unreachable")
1144+
cmd = fmt.Sprintf("wget -qO- -T 2 http://%s:%d/; test \"$?\" -eq \"1\"", svcName, port)
1145+
if pollErr := wait.PollImmediate(framework.Poll, kubeProxyLagTimeout, func() (bool, error) {
1146+
var err error
1147+
stdout, err = framework.RunHostCmd(f.Namespace.Name, execPodName, cmd)
1148+
if err != nil {
1149+
framework.Logf("expected un-ready endpoint for Service %v, stdout: %v, err %v", t.name, stdout, err)
1150+
return false, nil
1151+
}
1152+
return true, nil
1153+
}); pollErr != nil {
1154+
framework.Failf("expected un-ready endpoint for Service %v within %v, stdout: %v", t.name, kubeProxyLagTimeout, stdout)
1155+
}
1156+
1157+
By("Update service to tolerate unready services again")
1158+
_, err = updateService(f.ClientSet, t.Namespace, t.ServiceName, func(s *v1.Service) {
1159+
s.ObjectMeta.Annotations[endpoint.TolerateUnreadyEndpointsAnnotation] = "true"
1160+
})
1161+
framework.ExpectNoError(err)
1162+
1163+
By("Check if terminating pod is available through service")
1164+
cmd = fmt.Sprintf("wget -qO- http://%s:%d/", svcName, port)
1165+
if pollErr := wait.PollImmediate(framework.Poll, kubeProxyLagTimeout, func() (bool, error) {
1166+
var err error
1167+
stdout, err = framework.RunHostCmd(f.Namespace.Name, execPodName, cmd)
1168+
if err != nil {
1169+
framework.Logf("expected un-ready endpoint for Service %v, stdout: %v, err %v", t.name, stdout, err)
1170+
return false, nil
1171+
}
1172+
return true, nil
1173+
}); pollErr != nil {
1174+
framework.Failf("expected un-ready endpoint for Service %v within %v, stdout: %v", t.name, kubeProxyLagTimeout, stdout)
1175+
}
1176+
1177+
By("Remove pods immediately")
1178+
label := labels.SelectorFromSet(labels.Set(t.Labels))
1179+
options := v1.ListOptions{LabelSelector: label.String()}
1180+
podClient := t.Client.Core().Pods(f.Namespace.Name)
1181+
pods, err := podClient.List(options)
1182+
if err != nil {
1183+
framework.Logf("warning: error retrieving pods: %s", err)
1184+
} else {
1185+
for _, pod := range pods.Items {
1186+
var gracePeriodSeconds int64 = 0
1187+
err := podClient.Delete(pod.Name, &v1.DeleteOptions{GracePeriodSeconds: &gracePeriodSeconds})
1188+
if err != nil {
1189+
framework.Logf("warning: error force deleting pod '%s': %s", pod.Name, err)
1190+
}
1191+
}
1192+
}
11061193
})
11071194

11081195
It("should only allow access from service loadbalancer source ranges [Slow]", func() {

0 commit comments

Comments
 (0)