Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 481f378

Browse files
mikesmithghMike Smith
authored andcommitted
feat: Add leader election
Signed-off-by: Mike Smith <[email protected]>
1 parent 3b690b2 commit 481f378

File tree

12 files changed

+155
-19
lines changed

12 files changed

+155
-19
lines changed

deploy/helm/metacontroller/templates/statefulset.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ metadata:
66
labels:
77
{{- include "metacontroller.labels" . | nindent 4 }}
88
spec:
9-
replicas: 1
9+
replicas: {{ .Values.replicas | default 1 }}
1010
serviceName: ''
1111
selector:
1212
matchLabels:

docs/src/guide/configuration.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,9 @@ in `manifests/metacontroller.yaml`):
2626
| `--events-qps` | Rate of events flowing per object (default - 1 event per 5 minutes, e.g. `--events-qps=0.0033`) |
2727
| `--events-burst` | Number of events allowed to send per object (default 25, e.g. `--events-burst=25`) |
2828
| `--pprof-address` | Enable pprof and bind to endpoint /debug/pprof, set to 0 to disable pprof serving (default 0, e.g. `--pprof-address=:6060`) |
29+
| `--leader-election` | Determines whether or not to use leader election when starting metacontroller (default `false`, e.g., `--leader-election`) |
30+
| `--leader-election-resource-lock` | Determines which resource lock to use for leader election (default `leases`, e.g., `--leader-election-resource-lock=leases`). Valid resource locks are `endpoints`, `configmaps`, `leases`, `endpointsleases`, or `configmapsleases`. See the client-go documentation [leaderelection/resourcelock](https://pkg.go.dev/k8s.io/client-go/tools/leaderelection/resourcelock#pkg-constants) for additional information. |
31+
| `--leader-election-namespace` | Determines the namespace in which the leader election resource will be created. If metacontroller is running in-cluster, the default leader election namespace is the same namespace as metacontroller. If metacontroller is running out-of-cluster, the default leader election namespace is undefined. If you are running metacontroller out-of-cluster with leader election enabled, you must specify the leader election namespace. (e.g., `--leader-election-namespace=metacontroller`) |
32+
| `--leader-election-id` | Determines the name of the resource that leader election will use for holding the leader lock. For example, if the leader election id is metacontroller and the leader election resource lock is leases, then a resource of kind `leases` with metadata.name `metacontroller` will hold the leader lock. (default metacontroller, e.g., `--leader-election-id=metacontroller`) |
2933

3034
Logging flags are being set by `controller-runtime`, more on the meaning of them can be found [here](https://sdk.operatorframework.io/docs/building-operators/golang/references/logging/#overview)

examples/leader-election/README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
## Leader Election
2+
3+
Metacontroller leverages [controller-runtime's leader election](https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/leaderelection).
4+
This is used to ensure that multiple replicas of metacontroller can run with only one active pod, for active-passive high availability.
5+
6+
7+
### Enable leader election
8+
Once enabled, metacontroller will attempt to acquire a leader on startup.
9+
- Add the metacontroller command-line argument leader-election
10+
```
11+
args:
12+
- --leader-election
13+
```
14+
- Increase `replicas` to desired count in [values.yaml](../../deploy/helm/values.yaml)
15+
```
16+
replicas: 2
17+
```
18+
- See [configuration.md](../../docs/src/guide/configuration.md) for additional configuration arguments.
19+
20+
### Disable leader election
21+
Once disabled, metacontroller will not attempt to acquire a leader on startup.
22+
- Omit the metacontroller command-line argument leader-election or set to false.
23+
```
24+
args:
25+
- --leader-election=false
26+
```
27+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Override args for development mode.
2+
apiVersion: apps/v1
3+
kind: StatefulSet
4+
metadata:
5+
name: metacontroller
6+
namespace: metacontroller
7+
spec:
8+
template:
9+
spec:
10+
containers:
11+
- name: metacontroller
12+
args:
13+
- --zap-devel
14+
- --zap-log-level=5
15+
- --discovery-interval=5s
16+
- --leader-election
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
bases:
2+
- ../../../manifests/dev
3+
patches:
4+
- args.yaml
5+
- replicas.yaml
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Override args for development mode.
2+
apiVersion: apps/v1
3+
kind: StatefulSet
4+
metadata:
5+
name: metacontroller
6+
namespace: metacontroller
7+
spec:
8+
replicas: 2

examples/leader-election/test.sh

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/bin/bash
2+
3+
cleanup() {
4+
exit_code=$?
5+
set +e
6+
echo "Rollback metacontroller..."
7+
kubectl scale statefulset --replicas="$previous_replicas" -n metacontroller metacontroller
8+
kubectl rollout undo statefulset metacontroller -n metacontroller
9+
kubectl rollout status --watch --timeout=180s statefulset/metacontroller -n metacontroller
10+
exit $exit_code
11+
}
12+
trap cleanup EXIT
13+
14+
set -ex
15+
16+
success_msg='successfully acquired lease'
17+
attempt_msg='attempting to acquire leader lease'
18+
19+
previous_replicas=$(kubectl get statefulset metacontroller -n metacontroller -o=jsonpath='{.spec.replicas}')
20+
kubectl apply -k ./manifest
21+
kubectl rollout status --watch --timeout=180s statefulset/metacontroller -n metacontroller
22+
23+
# both pods must be ready before checking logs
24+
kubectl wait --timeout=180s --for=condition=ready pod metacontroller-0 -n metacontroller
25+
kubectl wait --timeout=180s --for=condition=ready pod metacontroller-1 -n metacontroller
26+
27+
maximum_attempts=36
28+
# wait for one pod to acquire the leader lease
29+
until [[ "$(kubectl logs metacontroller-0 -n metacontroller | grep "$success_msg" | wc -l)" -eq 1 ||
30+
"$(kubectl logs metacontroller-1 -n metacontroller | grep "$success_msg" | wc -l)" -eq 1 ]]; do
31+
sleep 5
32+
# timeout at 180s if no leader lease acquired
33+
((maximum_attempts--)) # this will exit with an error when equal to zero
34+
done
35+
36+
# determine which pods have attempted or successfully acquired the leader lease
37+
pod0_attempt=$(kubectl logs metacontroller-0 -n metacontroller | grep "$attempt_msg" | wc -l | xargs echo -n)
38+
pod0_success=$(kubectl logs metacontroller-0 -n metacontroller | grep "$success_msg" | wc -l | xargs echo -n)
39+
pod1_attempt=$(kubectl logs metacontroller-1 -n metacontroller | grep "$attempt_msg" | wc -l | xargs echo -n)
40+
pod1_success=$(kubectl logs metacontroller-1 -n metacontroller | grep "$success_msg" | wc -l | xargs echo -n)
41+
42+
echo
43+
echo "Leader election results:"
44+
echo "metacontroller-0 leader election attempt count: $pod0_attempt, acquired count: $pod0_success"
45+
echo "metacontroller-1 leader election attempt count: $pod1_attempt, acquired count: $pod1_success"
46+
echo
47+
48+
# only one pod should successfully acquire the leader lease
49+
if [[ $((pod0_attempt + pod1_attempt)) -eq 0 ||
50+
$((pod0_success + pod1_success)) -eq 0 ]]; then
51+
exit 1
52+
fi
File renamed without changes.
File renamed without changes.

main.go

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ import (
2424
"sync"
2525
"time"
2626

27+
"k8s.io/client-go/tools/leaderelection/resourcelock"
28+
"sigs.k8s.io/controller-runtime/pkg/leaderelection"
29+
2730
"sigs.k8s.io/controller-runtime/pkg/log/zap"
2831

2932
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
@@ -37,16 +40,20 @@ import (
3740
)
3841

3942
var (
40-
discoveryInterval = flag.Duration("discovery-interval", 30*time.Second, "How often to refresh discovery cache to pick up newly-installed resources")
41-
informerRelist = flag.Duration("cache-flush-interval", 30*time.Minute, "How often to flush local caches and relist objects from the API server")
42-
metricsAddr = flag.String("metrics-address", ":9999", "The address to bind metrics endpoint - /metrics")
43-
clientGoQPS = flag.Float64("client-go-qps", 5, "Number of queries per second client-go is allowed to make (default 5)")
44-
clientGoBurst = flag.Int("client-go-burst", 10, "Allowed burst queries for client-go (default 10)")
45-
workers = flag.Int("workers", 5, "Number of sync workers to run (default 5)")
46-
eventsQPS = flag.Float64("events-qps", 1./300., "Rate of events flowing per object (default - 1 event per 5 minutes)")
47-
eventsBurst = flag.Int("events-burst", 25, "Number of events allowed to send per object (default 25)")
48-
pprofAddr = flag.String("pprof-address", "0", "Enable pprof and bind to endpoint - /debug/pprof, set to 0 to disable pprof serving")
49-
version = "No version provided"
43+
discoveryInterval = flag.Duration("discovery-interval", 30*time.Second, "How often to refresh discovery cache to pick up newly-installed resources")
44+
informerRelist = flag.Duration("cache-flush-interval", 30*time.Minute, "How often to flush local caches and relist objects from the API server")
45+
metricsAddr = flag.String("metrics-address", ":9999", "The address to bind metrics endpoint - /metrics")
46+
clientGoQPS = flag.Float64("client-go-qps", 5, "Number of queries per second client-go is allowed to make (default 5)")
47+
clientGoBurst = flag.Int("client-go-burst", 10, "Allowed burst queries for client-go (default 10)")
48+
workers = flag.Int("workers", 5, "Number of sync workers to run (default 5)")
49+
eventsQPS = flag.Float64("events-qps", 1./300., "Rate of events flowing per object (default - 1 event per 5 minutes)")
50+
eventsBurst = flag.Int("events-burst", 25, "Number of events allowed to send per object (default 25)")
51+
pprofAddr = flag.String("pprof-address", "0", "Enable pprof and bind to endpoint - /debug/pprof, set to 0 to disable pprof serving")
52+
leaderElection = flag.Bool("leader-election", false, "Determines whether or not to use leader election when starting metacontroller")
53+
leaderElectionResourceLock = flag.String("leader-election-resource-lock", resourcelock.LeasesResourceLock, "Determines which resource lock to use for leader election")
54+
leaderElectionNamespace = flag.String("leader-election-namespace", "", "Determines the namespace in which the leader election resource will be created")
55+
leaderElectionID = flag.String("leader-election-id", "metacontroller", "Determines the name of the resource that leader election will use for holding the leader lock")
56+
version = "No version provided"
5057
)
5158

5259
func main() {
@@ -65,6 +72,10 @@ func main() {
6572
"events-qps", *eventsQPS,
6673
"events-burst", *eventsBurst,
6774
"pprofAddr", *pprofAddr,
75+
"leader-election", *leaderElection,
76+
"leader-election-resource-lock", *leaderElectionResourceLock,
77+
"leader-election-namespace", *leaderElectionNamespace,
78+
"leader-election-id", *leaderElectionID,
6879
"version", version)
6980

7081
pprofStopChan := profile.EnablePprof(*pprofAddr)
@@ -87,6 +98,12 @@ func main() {
8798
QPS: float32(*eventsQPS),
8899
},
89100
MetricsEndpoint: *metricsAddr,
101+
LeaderElectionOptions: leaderelection.Options{
102+
LeaderElection: *leaderElection,
103+
LeaderElectionResourceLock: *leaderElectionResourceLock,
104+
LeaderElectionNamespace: *leaderElectionNamespace,
105+
LeaderElectionID: *leaderElectionID,
106+
},
90107
}
91108

92109
// Create a new manager with a stop function

0 commit comments

Comments
 (0)