diff --git a/analytics/consumer.py b/analytics/consumer.py index ae6e3c92..1381be7d 100644 --- a/analytics/consumer.py +++ b/analytics/consumer.py @@ -1,5 +1,6 @@ import logging from threading import Thread +from multiprocessing.pool import Pool import monotonic import backoff import json @@ -19,6 +20,25 @@ BATCH_SIZE_LIMIT = 475000 +def request(batch, write_key, host, gzip, timeout, retries): + """Attempt to upload the batch and retry before raising an error """ + + def fatal_exception(exc): + if isinstance(exc, APIError): + # retry on server errors and client errors with 429 status code (rate limited), + # don't retry on other client errors + return (400 <= exc.status < 500) and exc.status != 429 + else: + # retry on all other errors (eg. network) + return False + + @backoff.on_exception(backoff.expo, Exception, max_tries=retries + 1, giveup=fatal_exception) + def send_request(): + post(write_key, host, gzip=gzip, timeout=timeout, batch=batch) + + send_request() + + class Consumer(Thread): """Consumes the messages from the client's queue.""" log = logging.getLogger('segment') @@ -42,6 +62,7 @@ def __init__(self, queue, write_key, upload_size=100, host=None, on_error=None, self.running = True self.retries = retries self.timeout = timeout + self.pool = Pool(32) def run(self): """Runs the consumer.""" @@ -49,6 +70,8 @@ def run(self): while self.running: self.upload() + self.pool.close() + self.pool.join() self.log.debug('consumer exited.') def pause(self): @@ -56,25 +79,31 @@ def pause(self): self.running = False def upload(self): - """Upload the next batch of items, return whether successful.""" - success = False + """Asynchronously upload the next batch of items.""" batch = self.next() - if len(batch) == 0: - return False + length = len(batch) + if length == 0: + return - try: - self.request(batch) - success = True - except Exception as e: + def on_success(x): + done() + + def on_error(e): self.log.error('error uploading: %s', e) - success = False if self.on_error: self.on_error(e, batch) - finally: - # mark items as acknowledged from queue - for item in batch: + done() + + def done(): + for i in range(length): self.queue.task_done() - return success + + self.pool.apply_async( + request, + args=[batch, self.write_key, self.host, self.gzip, self.timeout, self.retries], + callback=on_success, + error_callback=on_error + ) def next(self): """Return the next batch of items to upload.""" @@ -103,21 +132,3 @@ def next(self): break return items - - def request(self, batch): - """Attempt to upload the batch and retry before raising an error """ - - def fatal_exception(exc): - if isinstance(exc, APIError): - # retry on server errors and client errors with 429 status code (rate limited), - # don't retry on other client errors - return (400 <= exc.status < 500) and exc.status != 429 - else: - # retry on all other errors (eg. network) - return False - - @backoff.on_exception(backoff.expo, Exception, max_tries=self.retries + 1, giveup=fatal_exception) - def send_request(): - post(self.write_key, self.host, gzip=self.gzip, timeout=self.timeout, batch=batch) - - send_request() diff --git a/test.py b/test.py new file mode 100644 index 00000000..e29270ce --- /dev/null +++ b/test.py @@ -0,0 +1,42 @@ +import math +import sys +import os +import time +import uuid +from analytics import Client + +def on_error(error, items): + print("An error occurred:", error) + +writeKey = os.getenv('writeKey', None) +events = os.getenv('events', 500000) + +assert writeKey is not None, "Please configure a write key using the writeKey environment variable" + +print(f'Sending {events} events to write key "{writeKey}"') + +analytics = Client( + writeKey, + debug=True, + on_error=on_error, + max_queue_size=math.inf, + upload_size=math.inf, + upload_interval=1 +) + +start = time.time() + +for it in range(events): + sys.stdout.write(f"\rProgress: {round(it / events * 100, 1)}%") + analytics.track('test', f'Iterated-{it}', { + 'plan': it + }) + +print() +print('Shutting down..') +analytics.shutdown() + +elapsed = time.time() - start + +print(f'elapsed: {elapsed} seconds') +print(f'{round(events / elapsed)}rps average') \ No newline at end of file