10
10
import traceback
11
11
import threading
12
12
import logging
13
+ import Queue
13
14
from urlparse import urlparse
14
15
from amazon_kclpy import kcl
15
16
from docopt import docopt
@@ -114,6 +115,8 @@ def __init__(self, params):
114
115
self .buffer_size = 2
115
116
# determine log level
116
117
self .log_level = params .get ('level' )
118
+ # get log subscribers
119
+ self .log_subscribers = params .get ('log_subscribers' , [])
117
120
if self .log_level is None :
118
121
self .log_level = DEFAULT_KCL_LOG_LEVEL
119
122
if self .log_level > 0 :
@@ -140,12 +143,20 @@ def get_logger_for_level_in_log_line(self, line):
140
143
return getattr (self .logger , level_name .lower ())
141
144
return None
142
145
146
+ def notify_subscribers (self , line ):
147
+ for subscriber in self .log_subscribers :
148
+ if re .match (subscriber .regex , line ):
149
+ subscriber .update (line )
150
+
143
151
def start_reading (self , params ):
144
152
for line in tail ("-n" , 0 , "-f" , params ['file' ], _iter = True ):
145
153
if not self .running :
146
154
return
147
155
if self .log_level > 0 :
148
156
line = line .replace ('\n ' , '' )
157
+ # notify subscribers
158
+ self .notify_subscribers (line )
159
+ # add line to buffer
149
160
self .buffer .append (line )
150
161
if len (self .buffer ) >= self .buffer_size :
151
162
logger_func = None
@@ -205,6 +216,33 @@ def stop(self, quiet=True):
205
216
self .running = False
206
217
207
218
219
+ class KclLogListener (object ):
220
+ def __init__ (self , regex = '.*' ):
221
+ self .regex = regex
222
+
223
+ def update (self , log_line ):
224
+ print (log_line )
225
+
226
+
227
+ class KclStartedLogListener (KclLogListener ):
228
+ def __init__ (self ):
229
+ self .regex_init = r'.*Initialization complete.*'
230
+ self .regex_take_shard = r'.*Received response .* for initialize.*'
231
+ # construct combined regex
232
+ regex = r'(%s)|(%s)' % (self .regex_init , self .regex_take_shard )
233
+ super (KclStartedLogListener , self ).__init__ (regex = regex )
234
+ # Semaphore.acquire does not provide timeout parameter, so we
235
+ # use a Queue here which provides the required functionality
236
+ self .sync_init = Queue .Queue (0 )
237
+ self .sync_take_shard = Queue .Queue (0 )
238
+
239
+ def update (self , log_line ):
240
+ if re .match (self .regex_init , log_line ):
241
+ self .sync_init .put (1 , block = False )
242
+ if re .match (self .regex_take_shard , log_line ):
243
+ self .sync_take_shard .put (1 , block = False )
244
+
245
+
208
246
# construct a stream info hash
209
247
def get_stream_info (stream_name , log_file = None , shards = None , env = None , endpoint_url = None ,
210
248
ddb_lease_table_suffix = None , env_vars = {}):
@@ -242,7 +280,8 @@ def get_stream_info(stream_name, log_file=None, shards=None, env=None, endpoint_
242
280
243
281
244
282
def start_kcl_client_process (stream_name , listener_script , log_file = None , env = None , configs = {},
245
- endpoint_url = None , ddb_lease_table_suffix = None , env_vars = {}, kcl_log_level = DEFAULT_KCL_LOG_LEVEL ):
283
+ endpoint_url = None , ddb_lease_table_suffix = None , env_vars = {},
284
+ kcl_log_level = DEFAULT_KCL_LOG_LEVEL , log_subscribers = []):
246
285
env = aws_stack .get_environment (env )
247
286
# decide which credentials provider to use
248
287
credentialsProvider = None
@@ -266,7 +305,8 @@ def start_kcl_client_process(stream_name, listener_script, log_file=None, env=No
266
305
run ('touch %s' % log_file )
267
306
# start log output reader thread which will read the KCL log
268
307
# file and print each line to stdout of this process...
269
- reader_thread = OutputReaderThread ({'file' : log_file , 'level' : kcl_log_level , 'log_prefix' : 'KCL' })
308
+ reader_thread = OutputReaderThread ({'file' : log_file , 'level' : kcl_log_level ,
309
+ 'log_prefix' : 'KCL' , 'log_subscribers' : log_subscribers })
270
310
reader_thread .start ()
271
311
272
312
# construct stream info
@@ -352,7 +392,8 @@ def receive_msg(records, checkpointer, shard_id):
352
392
353
393
def listen_to_kinesis (stream_name , listener_func = None , processor_script = None ,
354
394
events_file = None , endpoint_url = None , log_file = None , configs = {}, env = None ,
355
- ddb_lease_table_suffix = None , env_vars = {}, kcl_log_level = DEFAULT_KCL_LOG_LEVEL ):
395
+ ddb_lease_table_suffix = None , env_vars = {}, kcl_log_level = DEFAULT_KCL_LOG_LEVEL ,
396
+ log_subscribers = [], wait_until_started = False ):
356
397
"""
357
398
High-level function that allows to subscribe to a Kinesis stream
358
399
and receive events in a listener function. A KCL client process is
@@ -375,6 +416,27 @@ def listen_to_kinesis(stream_name, listener_func=None, processor_script=None,
375
416
# start KCL client (background process)
376
417
if processor_script [- 4 :] == '.pyc' :
377
418
processor_script = processor_script [0 :- 1 ]
378
- return start_kcl_client_process (stream_name , processor_script ,
419
+ # add log listener that notifies when KCL is started
420
+ if wait_until_started :
421
+ listener = KclStartedLogListener ()
422
+ log_subscribers .append (listener )
423
+
424
+ process = start_kcl_client_process (stream_name , processor_script ,
379
425
endpoint_url = endpoint_url , log_file = log_file , configs = configs , env = env ,
380
- ddb_lease_table_suffix = ddb_lease_table_suffix , env_vars = env_vars , kcl_log_level = kcl_log_level )
426
+ ddb_lease_table_suffix = ddb_lease_table_suffix , env_vars = env_vars , kcl_log_level = kcl_log_level ,
427
+ log_subscribers = log_subscribers )
428
+
429
+ if wait_until_started :
430
+ # wait at most 30 seconds for initialization
431
+ try :
432
+ listener .sync_init .get (block = True , timeout = 30 )
433
+ except Exception , e :
434
+ raise Exception ('Timeout when waiting for KCL initialization.' )
435
+ # wait at most 30 seconds for shard lease notification
436
+ try :
437
+ listener .sync_take_shard .get (block = True , timeout = 30 )
438
+ except Exception , e :
439
+ # this merely means that there is no shard available to take. Do nothing.
440
+ pass
441
+
442
+ return process
0 commit comments