Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8a5445d

Browse files
committed
Randomize apiserver watch timeouts
1 parent d9d12fd commit 8a5445d

File tree

5 files changed

+131
-13
lines changed

5 files changed

+131
-13
lines changed

cmd/kube-apiserver/apiserver.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ package main
2020

2121
import (
2222
"fmt"
23+
"math/rand"
2324
"os"
2425
"runtime"
26+
"time"
2527

2628
"github.com/GoogleCloudPlatform/kubernetes/cmd/kube-apiserver/app"
2729
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
@@ -32,6 +34,8 @@ import (
3234

3335
func main() {
3436
runtime.GOMAXPROCS(runtime.NumCPU())
37+
rand.Seed(time.Now().UTC().UnixNano())
38+
3539
s := app.NewAPIServer()
3640
s.AddFlags(pflag.CommandLine)
3741

cmd/kube-apiserver/app/server.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ import (
4646
"github.com/spf13/pflag"
4747
)
4848

49+
const (
50+
// Maximum duration before timing out read/write requests
51+
// Set to a value larger than the timeouts in each watch server.
52+
ReadWriteTimeout = time.Minute * 60
53+
)
54+
4955
// APIServer runs a kubernetes api server.
5056
type APIServer struct {
5157
InsecureBindAddress util.IP
@@ -393,8 +399,8 @@ func (s *APIServer) Run(_ []string) error {
393399
readOnlyServer := &http.Server{
394400
Addr: roLocation,
395401
Handler: apiserver.MaxInFlightLimit(sem, longRunningRE, apiserver.RecoverPanics(apiserver.ReadOnly(apiserver.RateLimit(rl, m.InsecureHandler)))),
396-
ReadTimeout: 5 * time.Minute,
397-
WriteTimeout: 5 * time.Minute,
402+
ReadTimeout: ReadWriteTimeout,
403+
WriteTimeout: ReadWriteTimeout,
398404
MaxHeaderBytes: 1 << 20,
399405
}
400406
glog.Infof("Serving read-only insecurely on %s", roLocation)
@@ -413,8 +419,8 @@ func (s *APIServer) Run(_ []string) error {
413419
secureServer := &http.Server{
414420
Addr: secureLocation,
415421
Handler: apiserver.MaxInFlightLimit(sem, longRunningRE, apiserver.RecoverPanics(m.Handler)),
416-
ReadTimeout: 5 * time.Minute,
417-
WriteTimeout: 5 * time.Minute,
422+
ReadTimeout: ReadWriteTimeout,
423+
WriteTimeout: ReadWriteTimeout,
418424
MaxHeaderBytes: 1 << 20,
419425
TLSConfig: &tls.Config{
420426
// Change default from SSLv3 to TLSv1.0 (because of POODLE vulnerability)
@@ -454,12 +460,11 @@ func (s *APIServer) Run(_ []string) error {
454460
}
455461
}()
456462
}
457-
458463
http := &http.Server{
459464
Addr: insecureLocation,
460465
Handler: apiserver.RecoverPanics(m.InsecureHandler),
461-
ReadTimeout: 5 * time.Minute,
462-
WriteTimeout: 5 * time.Minute,
466+
ReadTimeout: ReadWriteTimeout,
467+
WriteTimeout: ReadWriteTimeout,
463468
MaxHeaderBytes: 1 << 20,
464469
}
465470
glog.Infof("Serving insecurely on %s", insecureLocation)

pkg/apiserver/apiserver.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,14 @@ type APIGroupVersion struct {
132132
Context api.RequestContextMapper
133133
}
134134

135+
// TODO: Pipe these in through the apiserver cmd line
136+
const (
137+
// Minimum duration before timing out read/write requests
138+
MinTimeoutSecs = 300
139+
// Maximum duration before timing out read/write requests
140+
MaxTimeoutSecs = 600
141+
)
142+
135143
// InstallREST registers the REST handlers (storage, watch, proxy and redirect) into a restful Container.
136144
// It is expected that the provided path root prefix will serve all operations. Root MUST NOT end
137145
// in a slash. A restful WebService is created for the group and version.

pkg/apiserver/watch.go

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ limitations under the License.
1717
package apiserver
1818

1919
import (
20+
"math/rand"
2021
"net/http"
2122
"reflect"
2223
"regexp"
2324
"strings"
25+
"time"
2426

2527
"github.com/GoogleCloudPlatform/kubernetes/pkg/httplog"
2628
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
@@ -32,19 +34,47 @@ import (
3234
"golang.org/x/net/websocket"
3335
)
3436

35-
var connectionUpgradeRegex = regexp.MustCompile("(^|.*,\\s*)upgrade($|\\s*,)")
37+
var (
38+
connectionUpgradeRegex = regexp.MustCompile("(^|.*,\\s*)upgrade($|\\s*,)")
39+
40+
// nothing will ever be sent down this channel
41+
neverExitWatch <-chan time.Time = make(chan time.Time)
42+
)
3643

3744
func isWebsocketRequest(req *http.Request) bool {
3845
return connectionUpgradeRegex.MatchString(strings.ToLower(req.Header.Get("Connection"))) && strings.ToLower(req.Header.Get("Upgrade")) == "websocket"
3946
}
4047

48+
// timeoutFactory abstracts watch timeout logic for testing
49+
type timeoutFactory interface {
50+
TimeoutCh() (<-chan time.Time, func() bool)
51+
}
52+
53+
// realTimeoutFactory implements timeoutFactory
54+
type realTimeoutFactory struct {
55+
timeout time.Duration
56+
}
57+
58+
// TimeoutChan returns a channel which will receive something when the watch times out,
59+
// and a cleanup function to call when this happens.
60+
func (w *realTimeoutFactory) TimeoutCh() (<-chan time.Time, func() bool) {
61+
if w.timeout == 0 {
62+
return neverExitWatch, func() bool { return false }
63+
}
64+
t := time.NewTimer(w.timeout)
65+
return t.C, t.Stop
66+
}
67+
4168
// serveWatch handles serving requests to the server
4269
func serveWatch(watcher watch.Interface, scope RequestScope, w http.ResponseWriter, req *restful.Request) {
70+
// Each watch gets a random timeout to avoid thundering herds. Rand is seeded once in the api installer.
71+
timeout := time.Duration(MinTimeoutSecs+rand.Intn(MaxTimeoutSecs-MinTimeoutSecs)) * time.Second
72+
4373
watchServer := &WatchServer{watcher, scope.Codec, func(obj runtime.Object) {
4474
if err := setSelfLink(obj, req, scope.Namer); err != nil {
4575
glog.V(5).Infof("Failed to set self link for object %v: %v", reflect.TypeOf(obj), err)
4676
}
47-
}}
77+
}, &realTimeoutFactory{timeout}}
4878
if isWebsocketRequest(req.Request) {
4979
websocket.Handler(watchServer.HandleWS).ServeHTTP(httplog.Unlogged(w), req.Request)
5080
} else {
@@ -57,6 +87,7 @@ type WatchServer struct {
5787
watching watch.Interface
5888
codec runtime.Codec
5989
fixup func(runtime.Object)
90+
t timeoutFactory
6091
}
6192

6293
// HandleWS implements a websocket handler.
@@ -100,6 +131,9 @@ func (w *WatchServer) HandleWS(ws *websocket.Conn) {
100131
func (self *WatchServer) ServeHTTP(w http.ResponseWriter, req *http.Request) {
101132
loggedW := httplog.LogOf(req, w)
102133
w = httplog.Unlogged(w)
134+
timeoutCh, cleanup := self.t.TimeoutCh()
135+
defer cleanup()
136+
defer self.watching.Stop()
103137

104138
cn, ok := w.(http.CloseNotifier)
105139
if !ok {
@@ -113,16 +147,15 @@ func (self *WatchServer) ServeHTTP(w http.ResponseWriter, req *http.Request) {
113147
http.NotFound(w, req)
114148
return
115149
}
116-
117150
w.Header().Set("Transfer-Encoding", "chunked")
118151
w.WriteHeader(http.StatusOK)
119152
flusher.Flush()
120-
121153
encoder := watchjson.NewEncoder(w, self.codec)
122154
for {
123155
select {
124156
case <-cn.CloseNotify():
125-
self.watching.Stop()
157+
return
158+
case <-timeoutCh:
126159
return
127160
case event, ok := <-self.watching.ResultChan():
128161
if !ok {
@@ -132,7 +165,6 @@ func (self *WatchServer) ServeHTTP(w http.ResponseWriter, req *http.Request) {
132165
self.fixup(event.Object)
133166
if err := encoder.Encode(&event); err != nil {
134167
// Client disconnect.
135-
self.watching.Stop()
136168
return
137169
}
138170
flusher.Flush()

pkg/apiserver/watch_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@ package apiserver
1818

1919
import (
2020
"encoding/json"
21+
"io"
2122
"net/http"
2223
"net/http/httptest"
2324
"net/url"
2425
"reflect"
2526
"testing"
27+
"time"
2628

2729
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
2830
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/rest"
@@ -278,4 +280,71 @@ func TestWatchProtocolSelection(t *testing.T) {
278280
t.Errorf("Unexpected response %#v", response)
279281
}
280282
}
283+
284+
}
285+
286+
type fakeTimeoutFactory struct {
287+
timeoutCh chan time.Time
288+
done chan struct{}
289+
}
290+
291+
func (t *fakeTimeoutFactory) TimeoutCh() (<-chan time.Time, func() bool) {
292+
return t.timeoutCh, func() bool {
293+
defer close(t.done)
294+
return true
295+
}
296+
}
297+
298+
func TestWatchHTTPTimeout(t *testing.T) {
299+
watcher := watch.NewFake()
300+
timeoutCh := make(chan time.Time)
301+
done := make(chan struct{})
302+
303+
// Setup a new watchserver
304+
watchServer := &WatchServer{
305+
watcher,
306+
version2ServerCodec,
307+
func(obj runtime.Object) {},
308+
&fakeTimeoutFactory{timeoutCh, done},
309+
}
310+
311+
s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
312+
watchServer.ServeHTTP(w, req)
313+
}))
314+
defer s.Close()
315+
316+
// Setup a client
317+
dest, _ := url.Parse(s.URL)
318+
dest.Path = "/api/version/watch/resource"
319+
dest.RawQuery = ""
320+
321+
req, _ := http.NewRequest("GET", dest.String(), nil)
322+
client := http.Client{}
323+
resp, err := client.Do(req)
324+
watcher.Add(&api.Pod{TypeMeta: api.TypeMeta{APIVersion: "v1beta3"}})
325+
326+
// Make sure we can actually watch an endpoint
327+
decoder := json.NewDecoder(resp.Body)
328+
var got watchJSON
329+
err = decoder.Decode(&got)
330+
if err != nil {
331+
t.Fatalf("Unexpected error: %v", err)
332+
}
333+
334+
// Timeout and check for leaks
335+
close(timeoutCh)
336+
select {
337+
case <-done:
338+
if !watcher.Stopped {
339+
t.Errorf("Leaked watch on timeout")
340+
}
341+
case <-time.After(100 * time.Millisecond):
342+
t.Errorf("Failed to stop watcher after 100ms of timeout signal")
343+
}
344+
345+
// Make sure we can't receive any more events through the timeout watch
346+
err = decoder.Decode(&got)
347+
if err != io.EOF {
348+
t.Errorf("Unexpected non-error")
349+
}
281350
}

0 commit comments

Comments
 (0)