Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 95b3337

Browse files
mikesmithghMike Smith
authored andcommitted
feat: Add leader election
Signed-off-by: Mike Smith <[email protected]>
1 parent 3b690b2 commit 95b3337

File tree

12 files changed

+154
-19
lines changed

12 files changed

+154
-19
lines changed

deploy/helm/metacontroller/templates/statefulset.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ metadata:
66
labels:
77
{{- include "metacontroller.labels" . | nindent 4 }}
88
spec:
9-
replicas: 1
9+
replicas: {{ .Values.replicas | default 1 }}
1010
serviceName: ''
1111
selector:
1212
matchLabels:

docs/src/guide/configuration.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,9 @@ in `manifests/metacontroller.yaml`):
2626
| `--events-qps` | Rate of events flowing per object (default - 1 event per 5 minutes, e.g. `--events-qps=0.0033`) |
2727
| `--events-burst` | Number of events allowed to send per object (default 25, e.g. `--events-burst=25`) |
2828
| `--pprof-address` | Enable pprof and bind to endpoint /debug/pprof, set to 0 to disable pprof serving (default 0, e.g. `--pprof-address=:6060`) |
29+
| `--leader-election` | Determines whether or not to use leader election when starting metacontroller (default `false`, e.g., `--leader-election`) |
30+
| `--leader-election-resource-lock` | Determines which resource lock to use for leader election (default `leases`, e.g., `--leader-election-resource-lock=leases`). Valid resource locks are `endpoints`, `configmaps`, `leases`, `endpointsleases`, or `configmapsleases`. See the client-go documentation [leaderelection/resourcelock](https://pkg.go.dev/k8s.io/client-go/tools/leaderelection/resourcelock#pkg-constants) for additional information. |
31+
| `--leader-election-namespace` | Determines the namespace in which the leader election resource will be created. If metacontroller is running in-cluster, the default leader election namespace is the same namespace as metacontroller. If metacontroller is running out-of-cluster, the default leader election namespace is undefined. If you are running metacontroller out-of-cluster with leader election enabled, you must specify the leader election namespace. (e.g., `--leader-election-namespace=metacontroller`) |
32+
| `--leader-election-id` | Determines the name of the resource that leader election will use for holding the leader lock. For example, if the leader election id is `metacontroller` and the leader election resource lock is `leases`, then a resource of kind `leases` with metadata.name `metacontroller` will hold the leader lock. (default metacontroller, e.g., `--leader-election-id=metacontroller`) |
2933

3034
Logging flags are being set by `controller-runtime`, more on the meaning of them can be found [here](https://sdk.operatorframework.io/docs/building-operators/golang/references/logging/#overview)

examples/leader-election/README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
## Leader Election
2+
3+
Metacontroller leverages [controller-runtime's leader election](https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/leaderelection).
4+
This is used to ensure that multiple replicas of metacontroller can run with only one active pod, for active-passive high availability.
5+
6+
7+
### Enable leader election
8+
Once enabled, metacontroller will attempt to acquire a leader on startup.
9+
- Add the metacontroller command-line argument leader-election
10+
```
11+
args:
12+
- --leader-election
13+
```
14+
- Increase `replicas` to desired count in [values.yaml](../../deploy/helm/values.yaml)
15+
```
16+
replicas: 2
17+
```
18+
- See [configuration.md](../../docs/src/guide/configuration.md) for additional configuration arguments.
19+
20+
### Disable leader election
21+
Once disabled, metacontroller will not attempt to acquire a leader on startup.
22+
- Omit the metacontroller command-line argument leader-election or set to false.
23+
```
24+
args:
25+
- --leader-election=false
26+
```
27+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Override args for development mode.
2+
apiVersion: apps/v1
3+
kind: StatefulSet
4+
metadata:
5+
name: metacontroller
6+
namespace: metacontroller
7+
spec:
8+
template:
9+
spec:
10+
containers:
11+
- name: metacontroller
12+
args:
13+
- --zap-devel
14+
- --zap-log-level=5
15+
- --discovery-interval=5s
16+
- --leader-election
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
bases:
2+
- ../../../manifests/dev
3+
patches:
4+
- args.yaml
5+
- replicas.yaml
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Override args for development mode.
2+
apiVersion: apps/v1
3+
kind: StatefulSet
4+
metadata:
5+
name: metacontroller
6+
namespace: metacontroller
7+
spec:
8+
replicas: 2

examples/leader-election/test.sh

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/bash
2+
3+
cleanup() {
4+
exit_code=$?
5+
set +e
6+
echo "Rollback metacontroller..."
7+
kubectl scale statefulset --replicas="$previous_replicas" -n metacontroller metacontroller
8+
kubectl rollout undo statefulset metacontroller -n metacontroller
9+
kubectl rollout status --watch --timeout=180s statefulset/metacontroller -n metacontroller
10+
exit $exit_code
11+
}
12+
trap cleanup EXIT
13+
14+
set -ex
15+
16+
success_msg='successfully acquired lease'
17+
attempt_msg='attempting to acquire leader lease'
18+
19+
previous_replicas=$(kubectl get statefulset metacontroller -n metacontroller -o=jsonpath='{.spec.replicas}')
20+
kubectl apply -k ./manifest
21+
kubectl rollout status --watch --timeout=180s statefulset/metacontroller -n metacontroller
22+
23+
# both pods must be ready before checking logs
24+
kubectl wait --timeout=180s --for=condition=ready pod metacontroller-0 -n metacontroller
25+
kubectl wait --timeout=180s --for=condition=ready pod metacontroller-1 -n metacontroller
26+
27+
maximum_attempts=36
28+
# wait for one pod to acquire the leader lease
29+
until [[ "$(kubectl logs metacontroller-0 -n metacontroller | grep "$success_msg" | wc -l)" -eq 1 ||
30+
"$(kubectl logs metacontroller-1 -n metacontroller | grep "$success_msg" | wc -l)" -eq 1 ]]; do
31+
sleep 5
32+
# timeout at 180s if no leader lease acquired
33+
((maximum_attempts--)) # this will exit with an error when equal to zero
34+
done
35+
36+
# determine which pods have attempted or successfully acquired the leader lease
37+
pod0_attempt=$(kubectl logs metacontroller-0 -n metacontroller | grep "$attempt_msg" | wc -l | xargs echo -n)
38+
pod0_success=$(kubectl logs metacontroller-0 -n metacontroller | grep "$success_msg" | wc -l | xargs echo -n)
39+
pod1_attempt=$(kubectl logs metacontroller-1 -n metacontroller | grep "$attempt_msg" | wc -l | xargs echo -n)
40+
pod1_success=$(kubectl logs metacontroller-1 -n metacontroller | grep "$success_msg" | wc -l | xargs echo -n)
41+
42+
echo
43+
echo "Leader election results:"
44+
echo "metacontroller-0 leader election attempt count: $pod0_attempt, acquired count: $pod0_success"
45+
echo "metacontroller-1 leader election attempt count: $pod1_attempt, acquired count: $pod1_success"
46+
echo
47+
48+
# only one pod should successfully acquire the leader lease
49+
if [[ $((pod0_success + pod1_success)) -eq 0 ]]; then
50+
exit 1
51+
fi
File renamed without changes.
File renamed without changes.

main.go

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ import (
2424
"sync"
2525
"time"
2626

27+
"k8s.io/client-go/tools/leaderelection/resourcelock"
28+
"sigs.k8s.io/controller-runtime/pkg/leaderelection"
29+
2730
"sigs.k8s.io/controller-runtime/pkg/log/zap"
2831

2932
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
@@ -37,16 +40,20 @@ import (
3740
)
3841

3942
var (
40-
discoveryInterval = flag.Duration("discovery-interval", 30*time.Second, "How often to refresh discovery cache to pick up newly-installed resources")
41-
informerRelist = flag.Duration("cache-flush-interval", 30*time.Minute, "How often to flush local caches and relist objects from the API server")
42-
metricsAddr = flag.String("metrics-address", ":9999", "The address to bind metrics endpoint - /metrics")
43-
clientGoQPS = flag.Float64("client-go-qps", 5, "Number of queries per second client-go is allowed to make (default 5)")
44-
clientGoBurst = flag.Int("client-go-burst", 10, "Allowed burst queries for client-go (default 10)")
45-
workers = flag.Int("workers", 5, "Number of sync workers to run (default 5)")
46-
eventsQPS = flag.Float64("events-qps", 1./300., "Rate of events flowing per object (default - 1 event per 5 minutes)")
47-
eventsBurst = flag.Int("events-burst", 25, "Number of events allowed to send per object (default 25)")
48-
pprofAddr = flag.String("pprof-address", "0", "Enable pprof and bind to endpoint - /debug/pprof, set to 0 to disable pprof serving")
49-
version = "No version provided"
43+
discoveryInterval = flag.Duration("discovery-interval", 30*time.Second, "How often to refresh discovery cache to pick up newly-installed resources")
44+
informerRelist = flag.Duration("cache-flush-interval", 30*time.Minute, "How often to flush local caches and relist objects from the API server")
45+
metricsAddr = flag.String("metrics-address", ":9999", "The address to bind metrics endpoint - /metrics")
46+
clientGoQPS = flag.Float64("client-go-qps", 5, "Number of queries per second client-go is allowed to make (default 5)")
47+
clientGoBurst = flag.Int("client-go-burst", 10, "Allowed burst queries for client-go (default 10)")
48+
workers = flag.Int("workers", 5, "Number of sync workers to run (default 5)")
49+
eventsQPS = flag.Float64("events-qps", 1./300., "Rate of events flowing per object (default - 1 event per 5 minutes)")
50+
eventsBurst = flag.Int("events-burst", 25, "Number of events allowed to send per object (default 25)")
51+
pprofAddr = flag.String("pprof-address", "0", "Enable pprof and bind to endpoint - /debug/pprof, set to 0 to disable pprof serving")
52+
leaderElection = flag.Bool("leader-election", false, "Determines whether or not to use leader election when starting metacontroller")
53+
leaderElectionResourceLock = flag.String("leader-election-resource-lock", resourcelock.LeasesResourceLock, "Determines which resource lock to use for leader election")
54+
leaderElectionNamespace = flag.String("leader-election-namespace", "", "Determines the namespace in which the leader election resource will be created")
55+
leaderElectionID = flag.String("leader-election-id", "metacontroller", "Determines the name of the resource that leader election will use for holding the leader lock")
56+
version = "No version provided"
5057
)
5158

5259
func main() {
@@ -65,6 +72,10 @@ func main() {
6572
"events-qps", *eventsQPS,
6673
"events-burst", *eventsBurst,
6774
"pprofAddr", *pprofAddr,
75+
"leader-election", *leaderElection,
76+
"leader-election-resource-lock", *leaderElectionResourceLock,
77+
"leader-election-namespace", *leaderElectionNamespace,
78+
"leader-election-id", *leaderElectionID,
6879
"version", version)
6980

7081
pprofStopChan := profile.EnablePprof(*pprofAddr)
@@ -87,6 +98,12 @@ func main() {
8798
QPS: float32(*eventsQPS),
8899
},
89100
MetricsEndpoint: *metricsAddr,
101+
LeaderElectionOptions: leaderelection.Options{
102+
LeaderElection: *leaderElection,
103+
LeaderElectionResourceLock: *leaderElectionResourceLock,
104+
LeaderElectionNamespace: *leaderElectionNamespace,
105+
LeaderElectionID: *leaderElectionID,
106+
},
90107
}
91108

92109
// Create a new manager with a stop function

0 commit comments

Comments
 (0)