@@ -15,29 +15,174 @@ import (
15
15
// Listener represents a pubsub handler.
16
16
type Listener func (ctx context.Context , message []byte )
17
17
18
+ // ListenerWithErr represents a pubsub handler that can also receive error
19
+ // indications
20
+ type ListenerWithErr func (ctx context.Context , message []byte , err error )
21
+
22
+ // ErrDroppedMessages is sent to ListenerWithErr if messages are dropped or
23
+ // might have been dropped.
24
+ var ErrDroppedMessages = xerrors .New ("dropped messages" )
25
+
18
26
// Pubsub is a generic interface for broadcasting and receiving messages.
19
27
// Implementors should assume high-availability with the backing implementation.
20
28
type Pubsub interface {
21
29
Subscribe (event string , listener Listener ) (cancel func (), err error )
30
+ SubscribeWithErr (event string , listener ListenerWithErr ) (cancel func (), err error )
22
31
Publish (event string , message []byte ) error
23
32
Close () error
24
33
}
25
34
35
+ // msgOrErr either contains a message or an error
36
+ type msgOrErr struct {
37
+ msg []byte
38
+ err error
39
+ }
40
+
41
+ // msgQueue implements a fixed length queue with the ability to replace elements
42
+ // after they are queued (but before they are dequeued).
43
+ //
44
+ // The purpose of this data structure is to build something that works a bit
45
+ // like a golang channel, but if the queue is full, then we can replace the
46
+ // last element with an error so that the subscriber can get notified that some
47
+ // messages were dropped, all without blocking.
48
+ type msgQueue struct {
49
+ ctx context.Context
50
+ cond * sync.Cond
51
+ q [PubsubBufferSize ]msgOrErr
52
+ front int
53
+ size int
54
+ closed bool
55
+ l Listener
56
+ le ListenerWithErr
57
+ }
58
+
59
+ func newMsgQueue (ctx context.Context , l Listener , le ListenerWithErr ) * msgQueue {
60
+ if l == nil && le == nil {
61
+ panic ("l or le must be non-nil" )
62
+ }
63
+ q := & msgQueue {
64
+ ctx : ctx ,
65
+ cond : sync .NewCond (& sync.Mutex {}),
66
+ l : l ,
67
+ le : le ,
68
+ }
69
+ go q .run ()
70
+ return q
71
+ }
72
+
73
+ func (q * msgQueue ) run () {
74
+ for {
75
+ // wait until there is something on the queue or we are closed
76
+ q .cond .L .Lock ()
77
+ for q .size == 0 && ! q .closed {
78
+ q .cond .Wait ()
79
+ }
80
+ if q .closed {
81
+ q .cond .L .Unlock ()
82
+ return
83
+ }
84
+ item := q .q [q .front ]
85
+ q .front = (q .front + 1 ) % PubsubBufferSize
86
+ q .size --
87
+ q .cond .L .Unlock ()
88
+
89
+ // process item without holding lock
90
+ if item .err == nil {
91
+ // real message
92
+ if q .l != nil {
93
+ q .l (q .ctx , item .msg )
94
+ continue
95
+ }
96
+ if q .le != nil {
97
+ q .le (q .ctx , item .msg , nil )
98
+ continue
99
+ }
100
+ // unhittable
101
+ continue
102
+ }
103
+ // if the listener wants errors, send it.
104
+ if q .le != nil {
105
+ q .le (q .ctx , nil , item .err )
106
+ }
107
+ }
108
+ }
109
+
110
+ func (q * msgQueue ) enqueue (msg []byte ) {
111
+ q .cond .L .Lock ()
112
+ defer q .cond .L .Unlock ()
113
+
114
+ if q .size == PubsubBufferSize {
115
+ // queue is full, so we're going to drop the msg we got called with.
116
+ // We also need to record that messages are being dropped, which we
117
+ // do at the last message in the queue. This potentially makes us
118
+ // lose 2 messages instead of one, but it's more important at this
119
+ // point to warn the subscriber that they're losing messages so they
120
+ // can do something about it.
121
+ back := (q .front + PubsubBufferSize - 1 ) % PubsubBufferSize
122
+ q .q [back ].msg = nil
123
+ q .q [back ].err = ErrDroppedMessages
124
+ return
125
+ }
126
+ // queue is not full, insert the message
127
+ next := (q .front + q .size ) % PubsubBufferSize
128
+ q .q [next ].msg = msg
129
+ q .q [next ].err = nil
130
+ q .size ++
131
+ q .cond .Broadcast ()
132
+ }
133
+
134
+ func (q * msgQueue ) close () {
135
+ q .cond .L .Lock ()
136
+ defer q .cond .L .Unlock ()
137
+ defer q .cond .Broadcast ()
138
+ q .closed = true
139
+ }
140
+
141
+ // dropped records an error in the queue that messages might have been dropped
142
+ func (q * msgQueue ) dropped () {
143
+ q .cond .L .Lock ()
144
+ defer q .cond .L .Unlock ()
145
+
146
+ if q .size == PubsubBufferSize {
147
+ // queue is full, but we need to record that messages are being dropped,
148
+ // which we do at the last message in the queue. This potentially drops
149
+ // another message, but it's more important for the subscriber to know.
150
+ back := (q .front + PubsubBufferSize - 1 ) % PubsubBufferSize
151
+ q .q [back ].msg = nil
152
+ q .q [back ].err = ErrDroppedMessages
153
+ return
154
+ }
155
+ // queue is not full, insert the error
156
+ next := (q .front + q .size ) % PubsubBufferSize
157
+ q .q [next ].msg = nil
158
+ q .q [next ].err = ErrDroppedMessages
159
+ q .size ++
160
+ q .cond .Broadcast ()
161
+ }
162
+
26
163
// Pubsub implementation using PostgreSQL.
27
164
type pgPubsub struct {
28
165
ctx context.Context
29
166
pgListener * pq.Listener
30
167
db * sql.DB
31
168
mut sync.Mutex
32
- listeners map [string ]map [uuid.UUID ]chan <- [] byte
169
+ queues map [string ]map [uuid.UUID ]* msgQueue
33
170
}
34
171
35
- // messageBufferSize is the maximum number of unhandled messages we will buffer
172
+ // PubsubBufferSize is the maximum number of unhandled messages we will buffer
36
173
// for a subscriber before dropping messages.
37
- const messageBufferSize = 2048
174
+ const PubsubBufferSize = 2048
38
175
39
176
// Subscribe calls the listener when an event matching the name is received.
40
177
func (p * pgPubsub ) Subscribe (event string , listener Listener ) (cancel func (), err error ) {
178
+ return p .subscribeQueue (event , newMsgQueue (p .ctx , listener , nil ))
179
+ }
180
+
181
+ func (p * pgPubsub ) SubscribeWithErr (event string , listener ListenerWithErr ) (cancel func (), err error ) {
182
+ return p .subscribeQueue (event , newMsgQueue (p .ctx , nil , listener ))
183
+ }
184
+
185
+ func (p * pgPubsub ) subscribeQueue (event string , newQ * msgQueue ) (cancel func (), err error ) {
41
186
p .mut .Lock ()
42
187
defer p .mut .Unlock ()
43
188
@@ -50,23 +195,20 @@ func (p *pgPubsub) Subscribe(event string, listener Listener) (cancel func(), er
50
195
return nil , xerrors .Errorf ("listen: %w" , err )
51
196
}
52
197
53
- var eventListeners map [uuid.UUID ]chan <- [] byte
198
+ var eventQs map [uuid.UUID ]* msgQueue
54
199
var ok bool
55
- if eventListeners , ok = p .listeners [event ]; ! ok {
56
- eventListeners = make (map [uuid.UUID ]chan <- [] byte )
57
- p .listeners [event ] = eventListeners
200
+ if eventQs , ok = p .queues [event ]; ! ok {
201
+ eventQs = make (map [uuid.UUID ]* msgQueue )
202
+ p .queues [event ] = eventQs
58
203
}
59
-
60
- ctx , cancelCallbacks := context .WithCancel (p .ctx )
61
- messages := make (chan []byte , messageBufferSize )
62
- go messagesToListener (ctx , messages , listener )
63
204
id := uuid .New ()
64
- eventListeners [id ] = messages
205
+ eventQs [id ] = newQ
65
206
return func () {
66
207
p .mut .Lock ()
67
208
defer p .mut .Unlock ()
68
- cancelCallbacks ()
69
- listeners := p .listeners [event ]
209
+ listeners := p .queues [event ]
210
+ q := listeners [id ]
211
+ q .close ()
70
212
delete (listeners , id )
71
213
72
214
if len (listeners ) == 0 {
@@ -109,6 +251,7 @@ func (p *pgPubsub) listen(ctx context.Context) {
109
251
}
110
252
// A nil notification can be dispatched on reconnect.
111
253
if notif == nil {
254
+ p .recordReconnect ()
112
255
continue
113
256
}
114
257
p .listenReceive (notif )
@@ -118,19 +261,22 @@ func (p *pgPubsub) listen(ctx context.Context) {
118
261
func (p * pgPubsub ) listenReceive (notif * pq.Notification ) {
119
262
p .mut .Lock ()
120
263
defer p .mut .Unlock ()
121
- listeners , ok := p .listeners [notif .Channel ]
264
+ queues , ok := p .queues [notif .Channel ]
122
265
if ! ok {
123
266
return
124
267
}
125
268
extra := []byte (notif .Extra )
126
- for _ , listener := range listeners {
127
- select {
128
- case listener <- extra :
129
- // ok!
130
- default :
131
- // bad news, we dropped the event because the listener isn't
132
- // keeping up
133
- // TODO (spike): figure out a way to communicate this to the Listener
269
+ for _ , q := range queues {
270
+ q .enqueue (extra )
271
+ }
272
+ }
273
+
274
+ func (p * pgPubsub ) recordReconnect () {
275
+ p .mut .Lock ()
276
+ defer p .mut .Unlock ()
277
+ for _ , listeners := range p .queues {
278
+ for _ , q := range listeners {
279
+ q .dropped ()
134
280
}
135
281
}
136
282
}
@@ -162,20 +308,9 @@ func NewPubsub(ctx context.Context, database *sql.DB, connectURL string) (Pubsub
162
308
ctx : ctx ,
163
309
db : database ,
164
310
pgListener : listener ,
165
- listeners : make (map [string ]map [uuid.UUID ]chan <- [] byte ),
311
+ queues : make (map [string ]map [uuid.UUID ]* msgQueue ),
166
312
}
167
313
go pgPubsub .listen (ctx )
168
314
169
315
return pgPubsub , nil
170
316
}
171
-
172
- func messagesToListener (ctx context.Context , messages <- chan []byte , listener Listener ) {
173
- for {
174
- select {
175
- case <- ctx .Done ():
176
- return
177
- case m := <- messages :
178
- listener (ctx , m )
179
- }
180
- }
181
- }
0 commit comments