From 751da6d535c5e25a318ae2ce81538d1291e84d65 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Thu, 8 Jun 2023 10:54:52 -0400 Subject: [PATCH 1/6] fix: add a callback function on flush_rows --- google/cloud/bigtable/batcher.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py index 6b06ec060..512657707 100644 --- a/google/cloud/bigtable/batcher.py +++ b/google/cloud/bigtable/batcher.py @@ -192,6 +192,10 @@ class MutationsBatcher(object): :type flush_interval: float :param flush_interval: (Optional) The interval (in seconds) between asynchronous flush. Default is 1 second. + + :type flush_completed_callback: Callable = None + :param flush_completed_callback: (Optional) A callable funtion for handling responses + after the request is flushed. """ def __init__( @@ -200,6 +204,7 @@ def __init__( flush_count=FLUSH_COUNT, max_row_bytes=MAX_MUTATION_SIZE, flush_interval=1, + flush_completed_callback=None, ): self._rows = _MutationsBatchQueue( max_mutation_bytes=max_row_bytes, flush_count=flush_count @@ -215,6 +220,7 @@ def __init__( ) self.futures_mapping = {} self.exceptions = queue.Queue() + self.flush_completed_callback = flush_completed_callback @property def flush_count(self): @@ -357,6 +363,9 @@ def _flush_rows(self, rows_to_flush): if len(rows_to_flush) > 0: response = self.table.mutate_rows(rows_to_flush) + if self.flush_completed_callback: + self.flush(response) + for result in response: if result.code != 0: exc = from_grpc_status(result.code, result.message) From 5f5b4539072703b61d6cc1b5ece00d493731f5e3 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Thu, 8 Jun 2023 12:46:33 -0400 Subject: [PATCH 2/6] reformat --- google/cloud/bigtable/batcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py index 512657707..48a497294 100644 --- a/google/cloud/bigtable/batcher.py +++ b/google/cloud/bigtable/batcher.py @@ -194,7 +194,7 @@ class MutationsBatcher(object): Default is 1 second. :type flush_completed_callback: Callable = None - :param flush_completed_callback: (Optional) A callable funtion for handling responses + :param flush_completed_callback: (Optional) A callable funtion for handling responses after the request is flushed. """ From 1a57f01f301cdde6efda8cadf9bd4403755c671f Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Thu, 8 Jun 2023 12:52:12 -0400 Subject: [PATCH 3/6] address comments --- google/cloud/bigtable/batcher.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py index 48a497294..770496f5a 100644 --- a/google/cloud/bigtable/batcher.py +++ b/google/cloud/bigtable/batcher.py @@ -193,8 +193,8 @@ class MutationsBatcher(object): :param flush_interval: (Optional) The interval (in seconds) between asynchronous flush. Default is 1 second. - :type flush_completed_callback: Callable = None - :param flush_completed_callback: (Optional) A callable funtion for handling responses + :type batch_completed_callback: Callable = None + :param batch_completed_callback: (Optional) A callable for handling responses after the request is flushed. """ @@ -204,7 +204,7 @@ def __init__( flush_count=FLUSH_COUNT, max_row_bytes=MAX_MUTATION_SIZE, flush_interval=1, - flush_completed_callback=None, + batch_completed_callback=None, ): self._rows = _MutationsBatchQueue( max_mutation_bytes=max_row_bytes, flush_count=flush_count @@ -220,7 +220,7 @@ def __init__( ) self.futures_mapping = {} self.exceptions = queue.Queue() - self.flush_completed_callback = flush_completed_callback + self._batch_completed_callback = batch_completed_callback @property def flush_count(self): @@ -333,7 +333,7 @@ def _flush_async(self): self.flow_control.control_flow(batch_info) future = self._executor.submit(self._flush_rows, rows_to_flush) self.futures_mapping[future] = batch_info - future.add_done_callback(self._batch_completed_callback) + future.add_done_callback(self._batch_completed_clean_up_callback) # reset and start a new batch rows_to_flush = [] @@ -342,8 +342,8 @@ def _flush_async(self): mutations_count = 0 batch_info = _BatchInfo() - def _batch_completed_callback(self, future): - """Callback for when the mutation has finished. + def _batch_completed_clean_up_callback(self, future): + """Callback for when the mutation has finished to clean up the current batch and release items from the flow controller. Raise exceptions if there's any. Release the resources locked by the flow control and allow enqueued tasks to be run. @@ -363,8 +363,8 @@ def _flush_rows(self, rows_to_flush): if len(rows_to_flush) > 0: response = self.table.mutate_rows(rows_to_flush) - if self.flush_completed_callback: - self.flush(response) + if self._batch_completed_callback: + self._batch_completed_callback(response) for result in response: if result.code != 0: From ba4dc67f3cbad8e404ff5f73f74a523b3c9be3a1 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Thu, 8 Jun 2023 12:53:28 -0400 Subject: [PATCH 4/6] update doc --- google/cloud/bigtable/batcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py index 770496f5a..b121e4e0f 100644 --- a/google/cloud/bigtable/batcher.py +++ b/google/cloud/bigtable/batcher.py @@ -195,7 +195,7 @@ class MutationsBatcher(object): :type batch_completed_callback: Callable = None :param batch_completed_callback: (Optional) A callable for handling responses - after the request is flushed. + after the current batch is sent. """ def __init__( From fe69c3455e45c2d549a7af521f78b1995229b758 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Thu, 8 Jun 2023 13:02:06 -0400 Subject: [PATCH 5/6] update names --- google/cloud/bigtable/batcher.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py index b121e4e0f..ac9450c8a 100644 --- a/google/cloud/bigtable/batcher.py +++ b/google/cloud/bigtable/batcher.py @@ -220,7 +220,7 @@ def __init__( ) self.futures_mapping = {} self.exceptions = queue.Queue() - self._batch_completed_callback = batch_completed_callback + self._user_batch_completed_callback = batch_completed_callback @property def flush_count(self): @@ -333,7 +333,7 @@ def _flush_async(self): self.flow_control.control_flow(batch_info) future = self._executor.submit(self._flush_rows, rows_to_flush) self.futures_mapping[future] = batch_info - future.add_done_callback(self._batch_completed_clean_up_callback) + future.add_done_callback(self._batch_completed_callback) # reset and start a new batch rows_to_flush = [] @@ -342,8 +342,9 @@ def _flush_async(self): mutations_count = 0 batch_info = _BatchInfo() - def _batch_completed_clean_up_callback(self, future): - """Callback for when the mutation has finished to clean up the current batch and release items from the flow controller. + def _batch_completed_callback(self, future): + """Callback for when the mutation has finished to clean up the current batch + and release items from the flow controller. Raise exceptions if there's any. Release the resources locked by the flow control and allow enqueued tasks to be run. @@ -363,8 +364,8 @@ def _flush_rows(self, rows_to_flush): if len(rows_to_flush) > 0: response = self.table.mutate_rows(rows_to_flush) - if self._batch_completed_callback: - self._batch_completed_callback(response) + if self._user_batch_completed_callback: + self._user_batch_completed_callback(response) for result in response: if result.code != 0: From 4ba5ecf1e14190d55912a6e27842aead8e7ea733 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Thu, 8 Jun 2023 14:59:35 -0400 Subject: [PATCH 6/6] add a test --- google/cloud/bigtable/batcher.py | 5 +++-- tests/unit/test_batcher.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py index ac9450c8a..a6eb806e9 100644 --- a/google/cloud/bigtable/batcher.py +++ b/google/cloud/bigtable/batcher.py @@ -193,9 +193,10 @@ class MutationsBatcher(object): :param flush_interval: (Optional) The interval (in seconds) between asynchronous flush. Default is 1 second. - :type batch_completed_callback: Callable = None + :type batch_completed_callback: Callable[list:[`~google.rpc.status_pb2.Status`]] = None :param batch_completed_callback: (Optional) A callable for handling responses - after the current batch is sent. + after the current batch is sent. The callable function expect a list of grpc + Status. """ def __init__( diff --git a/tests/unit/test_batcher.py b/tests/unit/test_batcher.py index a238b2852..998748141 100644 --- a/tests/unit/test_batcher.py +++ b/tests/unit/test_batcher.py @@ -35,6 +35,27 @@ def test_mutation_batcher_constructor(): assert table is mutation_batcher.table +def test_mutation_batcher_w_user_callback(): + table = _Table(TABLE_NAME) + + def callback_fn(response): + callback_fn.count = len(response) + + with MutationsBatcher( + table, flush_count=1, batch_completed_callback=callback_fn + ) as mutation_batcher: + rows = [ + DirectRow(row_key=b"row_key"), + DirectRow(row_key=b"row_key_2"), + DirectRow(row_key=b"row_key_3"), + DirectRow(row_key=b"row_key_4"), + ] + + mutation_batcher.mutate_rows(rows) + + assert callback_fn.count == 4 + + def test_mutation_batcher_mutate_row(): table = _Table(TABLE_NAME) with MutationsBatcher(table=table) as mutation_batcher: