From c528317bd4d378a0216ebc55025bb49554b62af8 Mon Sep 17 00:00:00 2001 From: MiaCY Date: Thu, 25 May 2023 10:03:15 -0700 Subject: [PATCH 01/10] Refactor client.download_blob_to_file --- google/cloud/storage/blob.py | 227 +++++++++++++++++++++++++++++++-- google/cloud/storage/client.py | 115 +++++++++++------ 2 files changed, 289 insertions(+), 53 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 8a3f61c72..de33744a5 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -1124,11 +1124,27 @@ def download_to_file( :raises: :class:`google.cloud.exceptions.NotFound` """ - client = self._require_client(client) - - client.download_blob_to_file( - self, + # client = self._require_client(client) + + # client.download_blob_to_file( + # self, + # file_obj=file_obj, + # start=start, + # end=end, + # raw_download=raw_download, + # if_etag_match=if_etag_match, + # if_etag_not_match=if_etag_not_match, + # if_generation_match=if_generation_match, + # if_generation_not_match=if_generation_not_match, + # if_metageneration_match=if_metageneration_match, + # if_metageneration_not_match=if_metageneration_not_match, + # timeout=timeout, + # checksum=checksum, + # retry=retry, + # ) + self._download_blob( file_obj=file_obj, + client=client, start=start, end=end, raw_download=raw_download, @@ -1250,12 +1266,28 @@ def download_to_filename( :raises: :class:`google.cloud.exceptions.NotFound` """ - client = self._require_client(client) + # client = self._require_client(client) try: with open(filename, "wb") as file_obj: - client.download_blob_to_file( - self, - file_obj, + # client.download_blob_to_file( + # self, + # file_obj, + # start=start, + # end=end, + # raw_download=raw_download, + # if_etag_match=if_etag_match, + # if_etag_not_match=if_etag_not_match, + # if_generation_match=if_generation_match, + # if_generation_not_match=if_generation_not_match, + # if_metageneration_match=if_metageneration_match, + # if_metageneration_not_match=if_metageneration_not_match, + # timeout=timeout, + # checksum=checksum, + # retry=retry, + # ) + self._download_blob( + file_obj=file_obj, + client=client, start=start, end=end, raw_download=raw_download, @@ -1382,11 +1414,27 @@ def download_as_bytes( :raises: :class:`google.cloud.exceptions.NotFound` """ - client = self._require_client(client) + # client = self._require_client(client) string_buffer = BytesIO() - client.download_blob_to_file( - self, + # client.download_blob_to_file( + # self, + # string_buffer, + # start=start, + # end=end, + # raw_download=raw_download, + # if_etag_match=if_etag_match, + # if_etag_not_match=if_etag_not_match, + # if_generation_match=if_generation_match, + # if_generation_not_match=if_generation_not_match, + # if_metageneration_match=if_metageneration_match, + # if_metageneration_not_match=if_metageneration_not_match, + # timeout=timeout, + # checksum=checksum, + # retry=retry, + # ) + self._download_blob( string_buffer, + client=client, start=start, end=end, raw_download=raw_download, @@ -3932,6 +3980,163 @@ def open( :rtype: str or ``NoneType`` """ + def _download_blob( + self, + file_obj, + client=None, + start=None, + end=None, + raw_download=False, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + checksum="md5", + retry=DEFAULT_RETRY, + ): + + """Download the contents of a blob object or blob URI into a file-like object. + + See https://cloud.google.com/storage/docs/downloading-objects + + If :attr:`user_project` is set on the bucket, bills the API request + to that project. + + :type file_obj: file + :param file_obj: A file handle to which to write the blob's data. + + :type client: :class:`~google.cloud.storage.client.Client` + :param client: + (Optional) The client to use. If not passed, falls back to the + ``client`` stored on the blob's bucket. + + :type start: int + :param start: (Optional) The first byte in a range to be downloaded. + + :type end: int + :param end: (Optional) The last byte in a range to be downloaded. + + :type raw_download: bool + :param raw_download: + (Optional) If true, download the object without any expansion. + + :type if_etag_match: Union[str, Set[str]] + :param if_etag_match: + (Optional) See :ref:`using-if-etag-match` + + :type if_etag_not_match: Union[str, Set[str]] + :param if_etag_not_match: + (Optional) See :ref:`using-if-etag-not-match` + + :type if_generation_match: long + :param if_generation_match: + (Optional) See :ref:`using-if-generation-match` + + :type if_generation_not_match: long + :param if_generation_not_match: + (Optional) See :ref:`using-if-generation-not-match` + + :type if_metageneration_match: long + :param if_metageneration_match: + (Optional) See :ref:`using-if-metageneration-match` + + :type if_metageneration_not_match: long + :param if_metageneration_not_match: + (Optional) See :ref:`using-if-metageneration-not-match` + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type checksum: str + :param checksum: + (Optional) The type of checksum to compute to verify the integrity + of the object. The response headers must contain a checksum of the + requested type. If the headers lack an appropriate checksum (for + instance in the case of transcoded or ranged downloads where the + remote service does not know the correct checksum, including + downloads where chunk_size is set) an INFO-level log will be + emitted. Supported values are "md5", "crc32c" and None. The default + is "md5". + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A google.api_core.retry.Retry value will enable retries, + and the object will define retriable response codes and errors and + configure backoff and timeout options. + + A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a + Retry object and activates it only if certain conditions are met. + This class exists to provide safe defaults for RPC calls that are + not technically safe to retry normally (due to potential data + duplication or other side-effects) but become safe to retry if a + condition such as if_metageneration_match is set. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Media operations (downloads and uploads) do not support non-default + predicates in a Retry object. The default will always be used. Other + configuration changes for Retry objects such as delays and deadlines + are respected. + """ + + # Handle ConditionalRetryPolicy. + if isinstance(retry, ConditionalRetryPolicy): + # Conditional retries are designed for non-media calls, which change + # arguments into query_params dictionaries. Media operations work + # differently, so here we make a "fake" query_params to feed to the + # ConditionalRetryPolicy. + query_params = { + "ifGenerationMatch": if_generation_match, + "ifMetagenerationMatch": if_metageneration_match, + } + retry = retry.get_retry_policy_if_conditions_met(query_params=query_params) + + # if not isinstance(blob_or_uri, Blob): + # blob_or_uri = Blob.from_string(blob_or_uri) + # download_url = blob_or_uri._get_download_url( + client = self._require_client(client) + + download_url = self._get_download_url( + client, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + ) + headers = _get_encryption_headers(self._encryption_key) + headers["accept-encoding"] = "gzip" + _add_etag_match_headers( + headers, + if_etag_match=if_etag_match, + if_etag_not_match=if_etag_not_match, + ) + headers = {**_get_default_headers(client._connection.user_agent), **headers} + + transport = client._http + + try: + self._do_download( + transport, + file_obj, + download_url, + headers, + start, + end, + raw_download, + timeout=timeout, + checksum=checksum, + retry=retry, + ) + except resumable_media.InvalidResponse as exc: + _raise_from_invalid_response(exc) + @property def component_count(self): """Number of underlying components that make up this object. diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index bcb0b59ef..9a203cefb 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -31,12 +31,14 @@ from google.cloud._helpers import _LocalStack, _NOW from google.cloud.client import ClientWithProject from google.cloud.exceptions import NotFound -from google.cloud.storage._helpers import _get_default_headers + +# from google.cloud.storage._helpers import _get_default_headers from google.cloud.storage._helpers import _get_environ_project from google.cloud.storage._helpers import _get_storage_host from google.cloud.storage._helpers import _DEFAULT_STORAGE_HOST from google.cloud.storage._helpers import _bucket_bound_hostname_url -from google.cloud.storage._helpers import _add_etag_match_headers + +# from google.cloud.storage._helpers import _add_etag_match_headers from google.cloud.storage._http import Connection from google.cloud.storage._signing import ( get_expiration_seconds_v4, @@ -48,7 +50,7 @@ from google.cloud.storage.bucket import Bucket, _item_to_blob, _blobs_page_start from google.cloud.storage.blob import ( Blob, - _get_encryption_headers, + # _get_encryption_headers, _raise_from_invalid_response, ) from google.cloud.storage.hmac_key import HMACKeyMetadata @@ -56,7 +58,8 @@ from google.cloud.storage.acl import DefaultObjectACL from google.cloud.storage.constants import _DEFAULT_TIMEOUT from google.cloud.storage.retry import DEFAULT_RETRY -from google.cloud.storage.retry import ConditionalRetryPolicy + +# from google.cloud.storage.retry import ConditionalRetryPolicy _marker = object() @@ -1057,52 +1060,80 @@ def download_blob_to_file( are respected. """ - # Handle ConditionalRetryPolicy. - if isinstance(retry, ConditionalRetryPolicy): - # Conditional retries are designed for non-media calls, which change - # arguments into query_params dictionaries. Media operations work - # differently, so here we make a "fake" query_params to feed to the - # ConditionalRetryPolicy. - query_params = { - "ifGenerationMatch": if_generation_match, - "ifMetagenerationMatch": if_metageneration_match, - } - retry = retry.get_retry_policy_if_conditions_met(query_params=query_params) + # # Handle ConditionalRetryPolicy. + # if isinstance(retry, ConditionalRetryPolicy): + # # Conditional retries are designed for non-media calls, which change + # # arguments into query_params dictionaries. Media operations work + # # differently, so here we make a "fake" query_params to feed to the + # # ConditionalRetryPolicy. + # query_params = { + # "ifGenerationMatch": if_generation_match, + # "ifMetagenerationMatch": if_metageneration_match, + # } + # retry = retry.get_retry_policy_if_conditions_met(query_params=query_params) if not isinstance(blob_or_uri, Blob): blob_or_uri = Blob.from_string(blob_or_uri) - download_url = blob_or_uri._get_download_url( - self, + # download_url = blob_or_uri._get_download_url( + # self, + # if_generation_match=if_generation_match, + # if_generation_not_match=if_generation_not_match, + # if_metageneration_match=if_metageneration_match, + # if_metageneration_not_match=if_metageneration_not_match, + # ) + # headers = _get_encryption_headers(blob_or_uri._encryption_key) + # headers["accept-encoding"] = "gzip" + # _add_etag_match_headers( + # headers, + # if_etag_match=if_etag_match, + # if_etag_not_match=if_etag_not_match, + # ) + # headers = {**_get_default_headers(self._connection.user_agent), **headers} + + # transport = self._http + + # transport, download_url, headers = _download_blob_args( + # blob_or_uri=blob_or_uri, + # client=self, + # if_etag_match=if_etag_match, + # if_etag_not_match=if_etag_not_match, + # if_generation_match=if_generation_match, + # if_generation_not_match=if_generation_not_match, + # if_metageneration_match=if_metageneration_match, + # if_metageneration_not_match=if_metageneration_not_match, + # retry=retry, + # ) + # try: + # blob_or_uri._do_download( + # transport, + # file_obj, + # download_url, + # headers, + # start, + # end, + # raw_download, + # timeout=timeout, + # checksum=checksum, + # retry=retry, + # ) + # except resumable_media.InvalidResponse as exc: + # _raise_from_invalid_response(exc) + blob_or_uri._download_blob( + file_obj, + client=self, + start=start, + end=end, + raw_download=raw_download, + if_etag_match=if_etag_match, + if_etag_not_match=if_etag_not_match, if_generation_match=if_generation_match, if_generation_not_match=if_generation_not_match, if_metageneration_match=if_metageneration_match, if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + checksum=checksum, + retry=retry, ) - headers = _get_encryption_headers(blob_or_uri._encryption_key) - headers["accept-encoding"] = "gzip" - _add_etag_match_headers( - headers, - if_etag_match=if_etag_match, - if_etag_not_match=if_etag_not_match, - ) - headers = {**_get_default_headers(self._connection.user_agent), **headers} - - transport = self._http - try: - blob_or_uri._do_download( - transport, - file_obj, - download_url, - headers, - start, - end, - raw_download, - timeout=timeout, - checksum=checksum, - retry=retry, - ) - except resumable_media.InvalidResponse as exc: - _raise_from_invalid_response(exc) def list_blobs( self, From fdb60dde76f70fd435164340e58054364c51874b Mon Sep 17 00:00:00 2001 From: MiaCY Date: Thu, 25 May 2023 10:45:24 -0700 Subject: [PATCH 02/10] Chore: clean up code --- google/cloud/storage/blob.py | 60 +++------------------------------- google/cloud/storage/client.py | 57 +------------------------------- 2 files changed, 5 insertions(+), 112 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index de33744a5..920338326 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -1124,24 +1124,7 @@ def download_to_file( :raises: :class:`google.cloud.exceptions.NotFound` """ - # client = self._require_client(client) - - # client.download_blob_to_file( - # self, - # file_obj=file_obj, - # start=start, - # end=end, - # raw_download=raw_download, - # if_etag_match=if_etag_match, - # if_etag_not_match=if_etag_not_match, - # if_generation_match=if_generation_match, - # if_generation_not_match=if_generation_not_match, - # if_metageneration_match=if_metageneration_match, - # if_metageneration_not_match=if_metageneration_not_match, - # timeout=timeout, - # checksum=checksum, - # retry=retry, - # ) + self._download_blob( file_obj=file_obj, client=client, @@ -1266,25 +1249,9 @@ def download_to_filename( :raises: :class:`google.cloud.exceptions.NotFound` """ - # client = self._require_client(client) + try: with open(filename, "wb") as file_obj: - # client.download_blob_to_file( - # self, - # file_obj, - # start=start, - # end=end, - # raw_download=raw_download, - # if_etag_match=if_etag_match, - # if_etag_not_match=if_etag_not_match, - # if_generation_match=if_generation_match, - # if_generation_not_match=if_generation_not_match, - # if_metageneration_match=if_metageneration_match, - # if_metageneration_not_match=if_metageneration_not_match, - # timeout=timeout, - # checksum=checksum, - # retry=retry, - # ) self._download_blob( file_obj=file_obj, client=client, @@ -1414,24 +1381,9 @@ def download_as_bytes( :raises: :class:`google.cloud.exceptions.NotFound` """ - # client = self._require_client(client) + string_buffer = BytesIO() - # client.download_blob_to_file( - # self, - # string_buffer, - # start=start, - # end=end, - # raw_download=raw_download, - # if_etag_match=if_etag_match, - # if_etag_not_match=if_etag_not_match, - # if_generation_match=if_generation_match, - # if_generation_not_match=if_generation_not_match, - # if_metageneration_match=if_metageneration_match, - # if_metageneration_not_match=if_metageneration_not_match, - # timeout=timeout, - # checksum=checksum, - # retry=retry, - # ) + self._download_blob( string_buffer, client=client, @@ -4085,7 +4037,6 @@ def _download_blob( configuration changes for Retry objects such as delays and deadlines are respected. """ - # Handle ConditionalRetryPolicy. if isinstance(retry, ConditionalRetryPolicy): # Conditional retries are designed for non-media calls, which change @@ -4098,9 +4049,6 @@ def _download_blob( } retry = retry.get_retry_policy_if_conditions_met(query_params=query_params) - # if not isinstance(blob_or_uri, Blob): - # blob_or_uri = Blob.from_string(blob_or_uri) - # download_url = blob_or_uri._get_download_url( client = self._require_client(client) download_url = self._get_download_url( diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index 9a203cefb..cfdb07c94 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -1060,64 +1060,9 @@ def download_blob_to_file( are respected. """ - # # Handle ConditionalRetryPolicy. - # if isinstance(retry, ConditionalRetryPolicy): - # # Conditional retries are designed for non-media calls, which change - # # arguments into query_params dictionaries. Media operations work - # # differently, so here we make a "fake" query_params to feed to the - # # ConditionalRetryPolicy. - # query_params = { - # "ifGenerationMatch": if_generation_match, - # "ifMetagenerationMatch": if_metageneration_match, - # } - # retry = retry.get_retry_policy_if_conditions_met(query_params=query_params) - if not isinstance(blob_or_uri, Blob): blob_or_uri = Blob.from_string(blob_or_uri) - # download_url = blob_or_uri._get_download_url( - # self, - # if_generation_match=if_generation_match, - # if_generation_not_match=if_generation_not_match, - # if_metageneration_match=if_metageneration_match, - # if_metageneration_not_match=if_metageneration_not_match, - # ) - # headers = _get_encryption_headers(blob_or_uri._encryption_key) - # headers["accept-encoding"] = "gzip" - # _add_etag_match_headers( - # headers, - # if_etag_match=if_etag_match, - # if_etag_not_match=if_etag_not_match, - # ) - # headers = {**_get_default_headers(self._connection.user_agent), **headers} - - # transport = self._http - - # transport, download_url, headers = _download_blob_args( - # blob_or_uri=blob_or_uri, - # client=self, - # if_etag_match=if_etag_match, - # if_etag_not_match=if_etag_not_match, - # if_generation_match=if_generation_match, - # if_generation_not_match=if_generation_not_match, - # if_metageneration_match=if_metageneration_match, - # if_metageneration_not_match=if_metageneration_not_match, - # retry=retry, - # ) - # try: - # blob_or_uri._do_download( - # transport, - # file_obj, - # download_url, - # headers, - # start, - # end, - # raw_download, - # timeout=timeout, - # checksum=checksum, - # retry=retry, - # ) - # except resumable_media.InvalidResponse as exc: - # _raise_from_invalid_response(exc) + blob_or_uri._download_blob( file_obj, client=self, From 9c67a41dfafe99d140c497e1e5effb703d64deb5 Mon Sep 17 00:00:00 2001 From: MiaCY Date: Tue, 6 Jun 2023 10:46:54 -0700 Subject: [PATCH 03/10] refactor blob and client unit tests --- google/cloud/storage/blob.py | 8 +- google/cloud/storage/client.py | 10 +- tests/unit/test_blob.py | 517 +++++++++++++++++---------------- tests/unit/test_client.py | 21 +- 4 files changed, 283 insertions(+), 273 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 920338326..cd79ed86b 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -957,7 +957,7 @@ def _do_download( This private method does not accept ConditionalRetryPolicy values because the information necessary to evaluate the policy is instead - evaluated in client.download_blob_to_file(). + evaluated in blob._download_blob(). See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for information on retry types and how @@ -1126,7 +1126,7 @@ def download_to_file( """ self._download_blob( - file_obj=file_obj, + file_obj, client=client, start=start, end=end, @@ -1253,7 +1253,7 @@ def download_to_filename( try: with open(filename, "wb") as file_obj: self._download_blob( - file_obj=file_obj, + file_obj, client=client, start=start, end=end, @@ -3950,7 +3950,7 @@ def _download_blob( retry=DEFAULT_RETRY, ): - """Download the contents of a blob object or blob URI into a file-like object. + """Download the contents of a blob object into a file-like object. See https://cloud.google.com/storage/docs/downloading-objects diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index cfdb07c94..1625799ac 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -32,13 +32,11 @@ from google.cloud.client import ClientWithProject from google.cloud.exceptions import NotFound -# from google.cloud.storage._helpers import _get_default_headers from google.cloud.storage._helpers import _get_environ_project from google.cloud.storage._helpers import _get_storage_host from google.cloud.storage._helpers import _DEFAULT_STORAGE_HOST from google.cloud.storage._helpers import _bucket_bound_hostname_url -# from google.cloud.storage._helpers import _add_etag_match_headers from google.cloud.storage._http import Connection from google.cloud.storage._signing import ( get_expiration_seconds_v4, @@ -48,19 +46,13 @@ ) from google.cloud.storage.batch import Batch from google.cloud.storage.bucket import Bucket, _item_to_blob, _blobs_page_start -from google.cloud.storage.blob import ( - Blob, - # _get_encryption_headers, - _raise_from_invalid_response, -) +from google.cloud.storage.blob import Blob from google.cloud.storage.hmac_key import HMACKeyMetadata from google.cloud.storage.acl import BucketACL from google.cloud.storage.acl import DefaultObjectACL from google.cloud.storage.constants import _DEFAULT_TIMEOUT from google.cloud.storage.retry import DEFAULT_RETRY -# from google.cloud.storage.retry import ConditionalRetryPolicy - _marker = object() diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index 638db9f4e..72e36e161 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -1411,33 +1411,35 @@ def test_download_to_file_with_failure(self): blob_name = "blob-name" client = self._make_client() - client.download_blob_to_file.side_effect = NotFound("testing") bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) file_obj = io.BytesIO() - with self.assertRaises(NotFound): - blob.download_to_file(file_obj) + with mock.patch.object(blob, "_download_blob"): + blob._download_blob.side_effect = NotFound("testing") - self.assertEqual(file_obj.tell(), 0) + with self.assertRaises(NotFound): + blob.download_to_file(file_obj) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + self.assertEqual(file_obj.tell(), 0) + + expected_timeout = self._get_default_timeout() + blob._download_blob.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) def test_download_to_file_wo_media_link(self): blob_name = "blob-name" @@ -1446,28 +1448,29 @@ def test_download_to_file_wo_media_link(self): blob = self._make_one(blob_name, bucket=bucket) file_obj = io.BytesIO() - blob.download_to_file(file_obj) + with mock.patch.object(blob, "_download_blob"): + blob.download_to_file(file_obj) - # Make sure the media link is still unknown. - self.assertIsNone(blob.media_link) + # Make sure the media link is still unknown. + self.assertIsNone(blob.media_link) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + expected_timeout = self._get_default_timeout() + blob._download_blob.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) def test_download_to_file_w_etag_match(self): etag = "kittens" @@ -1475,25 +1478,26 @@ def test_download_to_file_w_etag_match(self): blob = self._make_one("blob-name", bucket=_Bucket(client)) file_obj = io.BytesIO() - blob.download_to_file(file_obj, if_etag_not_match=etag) + with mock.patch.object(blob, "_download_blob"): + blob.download_to_file(file_obj, if_etag_not_match=etag) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=etag, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + expected_timeout = self._get_default_timeout() + blob._download_blob.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=etag, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) def test_download_to_file_w_generation_match(self): generation_number = 6 @@ -1501,25 +1505,26 @@ def test_download_to_file_w_generation_match(self): blob = self._make_one("blob-name", bucket=_Bucket(client)) file_obj = io.BytesIO() - blob.download_to_file(file_obj, if_generation_not_match=generation_number) + with mock.patch.object(blob, "_download_blob"): + blob.download_to_file(file_obj, if_generation_not_match=generation_number) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=generation_number, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + expected_timeout = self._get_default_timeout() + blob._download_blob.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=generation_number, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) def _download_to_file_helper( self, use_chunks, raw_download, timeout=None, **extra_kwargs @@ -1544,28 +1549,30 @@ def _download_to_file_helper( extra_kwargs.update(timeout_kwarg) file_obj = io.BytesIO() - if raw_download: - blob.download_to_file(file_obj, raw_download=True, **extra_kwargs) - else: - blob.download_to_file(file_obj, **extra_kwargs) + + with mock.patch.object(blob, "_download_blob"): + if raw_download: + blob.download_to_file(file_obj, raw_download=True, **extra_kwargs) + else: + blob.download_to_file(file_obj, **extra_kwargs) - expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=raw_download, - timeout=expected_timeout, - checksum="md5", - retry=expected_retry, - ) + expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) + blob._download_blob.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=raw_download, + timeout=expected_timeout, + checksum="md5", + retry=expected_retry, + ) def test_download_to_file_wo_chunks_wo_raw(self): self._download_to_file_helper(use_chunks=False, raw_download=False) @@ -1602,48 +1609,49 @@ def _download_to_filename_helper( blob = self._make_one(blob_name, bucket=bucket, properties=properties) - with _NamedTemporaryFile() as temp: - if timeout is None: - blob.download_to_filename( - temp.name, raw_download=raw_download, **extra_kwargs - ) - else: - blob.download_to_filename( - temp.name, - raw_download=raw_download, - timeout=timeout, - **extra_kwargs, - ) - - if updated is None: - self.assertIsNone(blob.updated) - else: - mtime = os.path.getmtime(temp.name) - updated_time = blob.updated.timestamp() - self.assertEqual(mtime, updated_time) - - expected_timeout = self._get_default_timeout() if timeout is None else timeout - - expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=raw_download, - timeout=expected_timeout, - checksum="md5", - retry=expected_retry, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, temp.name) + with mock.patch.object(blob, "_download_blob"): + with _NamedTemporaryFile() as temp: + if timeout is None: + blob.download_to_filename( + temp.name, raw_download=raw_download, **extra_kwargs + ) + else: + blob.download_to_filename( + temp.name, + raw_download=raw_download, + timeout=timeout, + **extra_kwargs, + ) + + if updated is None: + self.assertIsNone(blob.updated) + else: + mtime = os.path.getmtime(temp.name) + updated_time = blob.updated.timestamp() + self.assertEqual(mtime, updated_time) + + expected_timeout = self._get_default_timeout() if timeout is None else timeout + + expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) + + blob._download_blob.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + raw_download=raw_download, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=expected_timeout, + checksum="md5", + retry=expected_retry, + ) + stream = blob._download_blob.mock_calls[0].args[0] + self.assertEqual(stream.name, temp.name) def test_download_to_filename_w_updated_wo_raw(self): updated = "2014-12-06T13:13:50.690Z" @@ -1677,28 +1685,29 @@ def test_download_to_filename_w_etag_match(self): client = self._make_client() blob = self._make_one("blob-name", bucket=_Bucket(client)) - with _NamedTemporaryFile() as temp: - blob.download_to_filename(temp.name, if_etag_match=etag) + with mock.patch.object(blob, "_download_blob"): + with _NamedTemporaryFile() as temp: + blob.download_to_filename(temp.name, if_etag_match=etag) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=etag, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, temp.name) + expected_timeout = self._get_default_timeout() + blob._download_blob.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + if_etag_match=etag, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) + stream = blob._download_blob.mock_calls[0].args[0] + self.assertEqual(stream.name, temp.name) def test_download_to_filename_w_generation_match(self): from google.cloud._testing import _NamedTemporaryFile @@ -1707,28 +1716,29 @@ def test_download_to_filename_w_generation_match(self): client = self._make_client() blob = self._make_one("blob-name", bucket=_Bucket(client)) - with _NamedTemporaryFile() as temp: - blob.download_to_filename(temp.name, if_generation_match=generation_number) + with mock.patch.object(blob, "_download_blob"): + with _NamedTemporaryFile() as temp: + blob.download_to_filename(temp.name, if_generation_match=generation_number) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=generation_number, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, temp.name) + expected_timeout = self._get_default_timeout() + blob._download_blob.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=generation_number, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) + stream = blob._download_blob.mock_calls[0].args[0] + self.assertEqual(stream.name, temp.name) def test_download_to_filename_corrupted(self): from google.resumable_media import DataCorruption @@ -1737,40 +1747,42 @@ def test_download_to_filename_corrupted(self): client = self._make_client() bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) - client.download_blob_to_file.side_effect = DataCorruption("testing") - # Try to download into a temporary file (don't use - # `_NamedTemporaryFile` it will try to remove after the file is - # already removed) - filehandle, filename = tempfile.mkstemp() - os.close(filehandle) - self.assertTrue(os.path.exists(filename)) + with mock.patch.object(blob, "_download_blob"): + blob._download_blob.side_effect = DataCorruption("testing") - with self.assertRaises(DataCorruption): - blob.download_to_filename(filename) + # Try to download into a temporary file (don't use + # `_NamedTemporaryFile` it will try to remove after the file is + # already removed) + filehandle, filename = tempfile.mkstemp() + os.close(filehandle) + self.assertTrue(os.path.exists(filename)) - # Make sure the file was cleaned up. - self.assertFalse(os.path.exists(filename)) + with self.assertRaises(DataCorruption): + blob.download_to_filename(filename) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, filename) + # Make sure the file was cleaned up. + self.assertFalse(os.path.exists(filename)) + + expected_timeout = self._get_default_timeout() + blob._download_blob.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) + stream = blob._download_blob.mock_calls[0].args[0] + self.assertEqual(stream.name, filename) def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): blob_name = "blob-name" @@ -1778,36 +1790,37 @@ def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) - if timeout is None: - expected_timeout = self._get_default_timeout() - fetched = blob.download_as_bytes(raw_download=raw_download, **extra_kwargs) - else: - expected_timeout = timeout - fetched = blob.download_as_bytes( - raw_download=raw_download, timeout=timeout, **extra_kwargs - ) - self.assertEqual(fetched, b"") + with mock.patch.object(blob, "_download_blob"): + if timeout is None: + expected_timeout = self._get_default_timeout() + fetched = blob.download_as_bytes(raw_download=raw_download, **extra_kwargs) + else: + expected_timeout = timeout + fetched = blob.download_as_bytes( + raw_download=raw_download, timeout=timeout, **extra_kwargs + ) + self.assertEqual(fetched, b"") - expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) + expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=raw_download, - timeout=expected_timeout, - checksum="md5", - retry=expected_retry, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertIsInstance(stream, io.BytesIO) + blob._download_blob.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + raw_download=raw_download, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=expected_timeout, + checksum="md5", + retry=expected_retry, + ) + stream = blob._download_blob.mock_calls[0].args[0] + self.assertIsInstance(stream, io.BytesIO) def test_download_as_bytes_w_custom_timeout(self): self._download_as_bytes_helper(raw_download=False, timeout=9.58) @@ -1820,14 +1833,14 @@ def test_download_as_bytes_w_etag_match(self): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._download_blob = mock.Mock() fetched = blob.download_as_bytes(if_etag_match=ETAG) self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._download_blob.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -1850,14 +1863,14 @@ def test_download_as_bytes_w_generation_match(self): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._download_blob = mock.Mock() fetched = blob.download_as_bytes(if_generation_match=GENERATION_NUMBER) self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._download_blob.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -2087,14 +2100,14 @@ def test_download_as_string(self, mock_warn): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._download_blob = mock.Mock() fetched = blob.download_as_string() self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._download_blob.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -2125,14 +2138,14 @@ def test_download_as_string_no_retry(self, mock_warn): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._download_blob = mock.Mock() fetched = blob.download_as_string(retry=None) self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._download_blob.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 0b5af95d6..2f7738399 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1638,10 +1638,17 @@ def test_create_bucket_w_name_only(self): retry=DEFAULT_RETRY, _target_object=bucket, ) + + @staticmethod + def _make_blob(*args, **kw): + from google.cloud.storage.blob import Blob + blob = Blob(*args, **kw) + + return blob def test_download_blob_to_file_with_failure(self): from google.resumable_media import InvalidResponse - from google.cloud.storage.blob import Blob + # from google.cloud.storage.blob import Blob from google.cloud.storage.constants import _DEFAULT_TIMEOUT project = "PROJECT" @@ -1652,7 +1659,7 @@ def test_download_blob_to_file_with_failure(self): grmp_response = InvalidResponse(raw_response) credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = mock.create_autospec(Blob) + blob = self._make_blob(name="blob_name", bucket="source_name") blob._encryption_key = None blob._get_download_url = mock.Mock() blob._do_download = mock.Mock() @@ -1689,7 +1696,7 @@ def test_download_blob_to_file_with_uri(self): project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(project=project, credentials=credentials) - blob = mock.Mock() + blob = self._make_blob(name="blob_name", bucket="source_name") file_obj = io.BytesIO() blob._encryption_key = None blob._get_download_url = mock.Mock() @@ -1787,13 +1794,12 @@ def test_download_blob_to_file_w_conditional_retry_fail(self): def _download_blob_to_file_helper( self, use_chunks, raw_download, expect_condition_fail=False, **extra_kwargs ): - from google.cloud.storage.blob import Blob from google.cloud.storage.constants import _DEFAULT_TIMEOUT project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = mock.create_autospec(Blob) + blob = self._make_blob(name="blob_name", bucket="source_name") blob._encryption_key = None blob._get_download_url = mock.Mock() if use_chunks: @@ -1863,14 +1869,13 @@ def test_download_blob_to_file_w_chunks_w_raw(self): self._download_blob_to_file_helper(use_chunks=True, raw_download=True) def test_download_blob_have_different_uuid(self): - from google.cloud.storage.blob import Blob - project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = mock.create_autospec(Blob) + blob = self._make_blob(name="blob_name", bucket="source_name") blob._encryption_key = None blob._do_download = mock.Mock() + blob._get_download_url = mock.Mock() file_obj = io.BytesIO() client.download_blob_to_file(blob, file_obj) client.download_blob_to_file(blob, file_obj) From 0d2d825e3e5bede60de2b0fa1468cd3abffd8eb1 Mon Sep 17 00:00:00 2001 From: MiaCY Date: Wed, 7 Jun 2023 13:22:11 -0700 Subject: [PATCH 04/10] lint reformat --- google/cloud/storage/blob.py | 2 +- google/cloud/storage/client.py | 2 -- tests/unit/test_blob.py | 14 ++++++++++---- tests/unit/test_client.py | 4 +++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index cd79ed86b..857f4e6b6 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -3949,7 +3949,7 @@ def _download_blob( checksum="md5", retry=DEFAULT_RETRY, ): - + """Download the contents of a blob object into a file-like object. See https://cloud.google.com/storage/docs/downloading-objects diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index e5d0ad17c..10df1b0ba 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -25,8 +25,6 @@ from google.auth.credentials import AnonymousCredentials -from google import resumable_media - from google.api_core import page_iterator from google.cloud._helpers import _LocalStack, _NOW from google.cloud.client import ClientWithProject diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index 72e36e161..ac71e6c6f 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -1549,7 +1549,7 @@ def _download_to_file_helper( extra_kwargs.update(timeout_kwarg) file_obj = io.BytesIO() - + with mock.patch.object(blob, "_download_blob"): if raw_download: blob.download_to_file(file_obj, raw_download=True, **extra_kwargs) @@ -1630,7 +1630,9 @@ def _download_to_filename_helper( updated_time = blob.updated.timestamp() self.assertEqual(mtime, updated_time) - expected_timeout = self._get_default_timeout() if timeout is None else timeout + expected_timeout = ( + self._get_default_timeout() if timeout is None else timeout + ) expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) @@ -1718,7 +1720,9 @@ def test_download_to_filename_w_generation_match(self): with mock.patch.object(blob, "_download_blob"): with _NamedTemporaryFile() as temp: - blob.download_to_filename(temp.name, if_generation_match=generation_number) + blob.download_to_filename( + temp.name, if_generation_match=generation_number + ) expected_timeout = self._get_default_timeout() blob._download_blob.assert_called_once_with( @@ -1793,7 +1797,9 @@ def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): with mock.patch.object(blob, "_download_blob"): if timeout is None: expected_timeout = self._get_default_timeout() - fetched = blob.download_as_bytes(raw_download=raw_download, **extra_kwargs) + fetched = blob.download_as_bytes( + raw_download=raw_download, **extra_kwargs + ) else: expected_timeout = timeout fetched = blob.download_as_bytes( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2f7738399..d7c8a53ea 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1638,16 +1638,18 @@ def test_create_bucket_w_name_only(self): retry=DEFAULT_RETRY, _target_object=bucket, ) - + @staticmethod def _make_blob(*args, **kw): from google.cloud.storage.blob import Blob + blob = Blob(*args, **kw) return blob def test_download_blob_to_file_with_failure(self): from google.resumable_media import InvalidResponse + # from google.cloud.storage.blob import Blob from google.cloud.storage.constants import _DEFAULT_TIMEOUT From 2d0ba3b8b9109ef274b2334aed8ea0ed05cab9b0 Mon Sep 17 00:00:00 2001 From: MiaCY Date: Mon, 12 Jun 2023 13:28:04 -0700 Subject: [PATCH 05/10] Rename _prep_and_do_download --- google/cloud/storage/blob.py | 12 +++--- google/cloud/storage/client.py | 2 +- tests/unit/test_blob.py | 70 +++++++++++++++++----------------- tests/unit/test_client.py | 10 ++--- 4 files changed, 46 insertions(+), 48 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 857f4e6b6..af68411f7 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -904,7 +904,7 @@ def _do_download( ): """Perform a download without any error handling. - This is intended to be called by :meth:`download_to_file` so it can + This is intended to be called by :meth:`_prep_and_do_download` so it can be wrapped with error handling / remapping. :type transport: @@ -957,7 +957,7 @@ def _do_download( This private method does not accept ConditionalRetryPolicy values because the information necessary to evaluate the policy is instead - evaluated in blob._download_blob(). + evaluated in blob._prep_and_do_download(). See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for information on retry types and how @@ -1125,7 +1125,7 @@ def download_to_file( :raises: :class:`google.cloud.exceptions.NotFound` """ - self._download_blob( + self._prep_and_do_download( file_obj, client=client, start=start, @@ -1252,7 +1252,7 @@ def download_to_filename( try: with open(filename, "wb") as file_obj: - self._download_blob( + self._prep_and_do_download( file_obj, client=client, start=start, @@ -1384,7 +1384,7 @@ def download_as_bytes( string_buffer = BytesIO() - self._download_blob( + self._prep_and_do_download( string_buffer, client=client, start=start, @@ -3932,7 +3932,7 @@ def open( :rtype: str or ``NoneType`` """ - def _download_blob( + def _prep_and_do_download( self, file_obj, client=None, diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index 10df1b0ba..e291ba85f 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -1060,7 +1060,7 @@ def download_blob_to_file( if not isinstance(blob_or_uri, Blob): blob_or_uri = Blob.from_string(blob_or_uri) - blob_or_uri._download_blob( + blob_or_uri._prep_and_do_download( file_obj, client=self, start=start, diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index ac71e6c6f..658dba81e 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -1415,8 +1415,8 @@ def test_download_to_file_with_failure(self): blob = self._make_one(blob_name, bucket=bucket) file_obj = io.BytesIO() - with mock.patch.object(blob, "_download_blob"): - blob._download_blob.side_effect = NotFound("testing") + with mock.patch.object(blob, "_prep_and_do_download"): + blob._prep_and_do_download.side_effect = NotFound("testing") with self.assertRaises(NotFound): blob.download_to_file(file_obj) @@ -1424,7 +1424,7 @@ def test_download_to_file_with_failure(self): self.assertEqual(file_obj.tell(), 0) expected_timeout = self._get_default_timeout() - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( file_obj, client=None, start=None, @@ -1448,14 +1448,14 @@ def test_download_to_file_wo_media_link(self): blob = self._make_one(blob_name, bucket=bucket) file_obj = io.BytesIO() - with mock.patch.object(blob, "_download_blob"): + with mock.patch.object(blob, "_prep_and_do_download"): blob.download_to_file(file_obj) # Make sure the media link is still unknown. self.assertIsNone(blob.media_link) expected_timeout = self._get_default_timeout() - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( file_obj, client=None, start=None, @@ -1478,11 +1478,11 @@ def test_download_to_file_w_etag_match(self): blob = self._make_one("blob-name", bucket=_Bucket(client)) file_obj = io.BytesIO() - with mock.patch.object(blob, "_download_blob"): + with mock.patch.object(blob, "_prep_and_do_download"): blob.download_to_file(file_obj, if_etag_not_match=etag) expected_timeout = self._get_default_timeout() - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( file_obj, client=None, start=None, @@ -1505,11 +1505,11 @@ def test_download_to_file_w_generation_match(self): blob = self._make_one("blob-name", bucket=_Bucket(client)) file_obj = io.BytesIO() - with mock.patch.object(blob, "_download_blob"): + with mock.patch.object(blob, "_prep_and_do_download"): blob.download_to_file(file_obj, if_generation_not_match=generation_number) expected_timeout = self._get_default_timeout() - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( file_obj, client=None, start=None, @@ -1550,14 +1550,14 @@ def _download_to_file_helper( file_obj = io.BytesIO() - with mock.patch.object(blob, "_download_blob"): + with mock.patch.object(blob, "_prep_and_do_download"): if raw_download: blob.download_to_file(file_obj, raw_download=True, **extra_kwargs) else: blob.download_to_file(file_obj, **extra_kwargs) expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( file_obj, client=None, start=None, @@ -1609,7 +1609,7 @@ def _download_to_filename_helper( blob = self._make_one(blob_name, bucket=bucket, properties=properties) - with mock.patch.object(blob, "_download_blob"): + with mock.patch.object(blob, "_prep_and_do_download"): with _NamedTemporaryFile() as temp: if timeout is None: blob.download_to_filename( @@ -1636,7 +1636,7 @@ def _download_to_filename_helper( expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( mock.ANY, client=None, start=None, @@ -1652,7 +1652,7 @@ def _download_to_filename_helper( checksum="md5", retry=expected_retry, ) - stream = blob._download_blob.mock_calls[0].args[0] + stream = blob._prep_and_do_download.mock_calls[0].args[0] self.assertEqual(stream.name, temp.name) def test_download_to_filename_w_updated_wo_raw(self): @@ -1687,12 +1687,12 @@ def test_download_to_filename_w_etag_match(self): client = self._make_client() blob = self._make_one("blob-name", bucket=_Bucket(client)) - with mock.patch.object(blob, "_download_blob"): + with mock.patch.object(blob, "_prep_and_do_download"): with _NamedTemporaryFile() as temp: blob.download_to_filename(temp.name, if_etag_match=etag) expected_timeout = self._get_default_timeout() - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( mock.ANY, client=None, start=None, @@ -1708,7 +1708,7 @@ def test_download_to_filename_w_etag_match(self): checksum="md5", retry=DEFAULT_RETRY, ) - stream = blob._download_blob.mock_calls[0].args[0] + stream = blob._prep_and_do_download.mock_calls[0].args[0] self.assertEqual(stream.name, temp.name) def test_download_to_filename_w_generation_match(self): @@ -1718,14 +1718,14 @@ def test_download_to_filename_w_generation_match(self): client = self._make_client() blob = self._make_one("blob-name", bucket=_Bucket(client)) - with mock.patch.object(blob, "_download_blob"): + with mock.patch.object(blob, "_prep_and_do_download"): with _NamedTemporaryFile() as temp: blob.download_to_filename( temp.name, if_generation_match=generation_number ) expected_timeout = self._get_default_timeout() - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( mock.ANY, client=None, start=None, @@ -1741,7 +1741,7 @@ def test_download_to_filename_w_generation_match(self): checksum="md5", retry=DEFAULT_RETRY, ) - stream = blob._download_blob.mock_calls[0].args[0] + stream = blob._prep_and_do_download.mock_calls[0].args[0] self.assertEqual(stream.name, temp.name) def test_download_to_filename_corrupted(self): @@ -1752,8 +1752,8 @@ def test_download_to_filename_corrupted(self): bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) - with mock.patch.object(blob, "_download_blob"): - blob._download_blob.side_effect = DataCorruption("testing") + with mock.patch.object(blob, "_prep_and_do_download"): + blob._prep_and_do_download.side_effect = DataCorruption("testing") # Try to download into a temporary file (don't use # `_NamedTemporaryFile` it will try to remove after the file is @@ -1769,7 +1769,7 @@ def test_download_to_filename_corrupted(self): self.assertFalse(os.path.exists(filename)) expected_timeout = self._get_default_timeout() - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( mock.ANY, client=None, start=None, @@ -1785,7 +1785,7 @@ def test_download_to_filename_corrupted(self): checksum="md5", retry=DEFAULT_RETRY, ) - stream = blob._download_blob.mock_calls[0].args[0] + stream = blob._prep_and_do_download.mock_calls[0].args[0] self.assertEqual(stream.name, filename) def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): @@ -1794,7 +1794,7 @@ def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) - with mock.patch.object(blob, "_download_blob"): + with mock.patch.object(blob, "_prep_and_do_download"): if timeout is None: expected_timeout = self._get_default_timeout() fetched = blob.download_as_bytes( @@ -1809,7 +1809,7 @@ def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( mock.ANY, client=None, start=None, @@ -1825,7 +1825,7 @@ def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): checksum="md5", retry=expected_retry, ) - stream = blob._download_blob.mock_calls[0].args[0] + stream = blob._prep_and_do_download.mock_calls[0].args[0] self.assertIsInstance(stream, io.BytesIO) def test_download_as_bytes_w_custom_timeout(self): @@ -1839,12 +1839,12 @@ def test_download_as_bytes_w_etag_match(self): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - blob._download_blob = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_bytes(if_etag_match=ETAG) self.assertEqual(fetched, b"") - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( mock.ANY, client=None, start=None, @@ -1869,12 +1869,12 @@ def test_download_as_bytes_w_generation_match(self): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - blob._download_blob = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_bytes(if_generation_match=GENERATION_NUMBER) self.assertEqual(fetched, b"") - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( mock.ANY, client=None, start=None, @@ -2106,12 +2106,12 @@ def test_download_as_string(self, mock_warn): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - blob._download_blob = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_string() self.assertEqual(fetched, b"") - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( mock.ANY, client=None, start=None, @@ -2144,12 +2144,12 @@ def test_download_as_string_no_retry(self, mock_warn): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - blob._download_blob = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_string(retry=None) self.assertEqual(fetched, b"") - blob._download_blob.assert_called_once_with( + blob._prep_and_do_download.assert_called_once_with( mock.ANY, client=None, start=None, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index d7c8a53ea..720c435a9 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1649,8 +1649,6 @@ def _make_blob(*args, **kw): def test_download_blob_to_file_with_failure(self): from google.resumable_media import InvalidResponse - - # from google.cloud.storage.blob import Blob from google.cloud.storage.constants import _DEFAULT_TIMEOUT project = "PROJECT" @@ -1661,7 +1659,7 @@ def test_download_blob_to_file_with_failure(self): grmp_response = InvalidResponse(raw_response) credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = self._make_blob(name="blob_name", bucket="source_name") + blob = self._make_blob(name="blob_name", bucket=None) blob._encryption_key = None blob._get_download_url = mock.Mock() blob._do_download = mock.Mock() @@ -1698,7 +1696,7 @@ def test_download_blob_to_file_with_uri(self): project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(project=project, credentials=credentials) - blob = self._make_blob(name="blob_name", bucket="source_name") + blob = self._make_blob(name="blob_name", bucket=None) file_obj = io.BytesIO() blob._encryption_key = None blob._get_download_url = mock.Mock() @@ -1801,7 +1799,7 @@ def _download_blob_to_file_helper( project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = self._make_blob(name="blob_name", bucket="source_name") + blob = self._make_blob(name="blob_name", bucket=None) blob._encryption_key = None blob._get_download_url = mock.Mock() if use_chunks: @@ -1874,7 +1872,7 @@ def test_download_blob_have_different_uuid(self): project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = self._make_blob(name="blob_name", bucket="source_name") + blob = self._make_blob(name="blob_name", bucket=None) blob._encryption_key = None blob._do_download = mock.Mock() blob._get_download_url = mock.Mock() From b41c38d9da9c4fcba129925a2b6bef960c616e59 Mon Sep 17 00:00:00 2001 From: MiaCY Date: Wed, 21 Jun 2023 10:18:05 -0700 Subject: [PATCH 06/10] Refactor blob.upload_from_file --- google/cloud/storage/blob.py | 163 ++++++++++++++++++++++++++++++++++- 1 file changed, 160 insertions(+), 3 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index af68411f7..15e95aaa3 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -2385,8 +2385,8 @@ def _do_upload( ) return response.json() - - def upload_from_file( + + def _prep_and_do_upload( self, file_obj, rewind=False, @@ -2556,6 +2556,163 @@ def upload_from_file( except resumable_media.InvalidResponse as exc: _raise_from_invalid_response(exc) + def upload_from_file( + self, + file_obj, + rewind=False, + size=None, + content_type=None, + num_retries=None, + client=None, + predefined_acl=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + checksum=None, + retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + ): + """Upload the contents of this blob from a file-like object. + + The content type of the upload will be determined in order + of precedence: + + - The value passed in to this method (if not :data:`None`) + - The value stored on the current blob + - The default value ('application/octet-stream') + + .. note:: + The effect of uploading to an existing blob depends on the + "versioning" and "lifecycle" policies defined on the blob's + bucket. In the absence of those policies, upload will + overwrite any existing contents. + + See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning) + and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle) + API documents for details. + + If the size of the data to be uploaded exceeds 8 MB a resumable media + request will be used, otherwise the content and the metadata will be + uploaded in a single multipart upload request. + + For more fine-grained over the upload process, check out + [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html). + + If :attr:`user_project` is set on the bucket, bills the API request + to that project. + + :type file_obj: file + :param file_obj: A file handle opened in binary mode for reading. + + :type rewind: bool + :param rewind: + If True, seek to the beginning of the file handle before writing + the file to Cloud Storage. + + :type size: int + :param size: + The number of bytes to be uploaded (which will be read from + ``file_obj``). If not provided, the upload will be concluded once + ``file_obj`` is exhausted. + + :type content_type: str + :param content_type: (Optional) Type of content being uploaded. + + :type num_retries: int + :param num_retries: + Number of upload retries. By default, only uploads with + if_generation_match set will be retried, as uploads without the + argument are not guaranteed to be idempotent. Setting num_retries + will override this default behavior and guarantee retries even when + if_generation_match is not set. (Deprecated: This argument + will be removed in a future release.) + + :type client: :class:`~google.cloud.storage.client.Client` + :param client: + (Optional) The client to use. If not passed, falls back to the + ``client`` stored on the blob's bucket. + + :type predefined_acl: str + :param predefined_acl: (Optional) Predefined access control list + + :type if_generation_match: long + :param if_generation_match: + (Optional) See :ref:`using-if-generation-match` + + :type if_generation_not_match: long + :param if_generation_not_match: + (Optional) See :ref:`using-if-generation-not-match` + + :type if_metageneration_match: long + :param if_metageneration_match: + (Optional) See :ref:`using-if-metageneration-match` + + :type if_metageneration_not_match: long + :param if_metageneration_not_match: + (Optional) See :ref:`using-if-metageneration-not-match` + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type checksum: str + :param checksum: + (Optional) The type of checksum to compute to verify + the integrity of the object. If the upload is completed in a single + request, the checksum will be entirely precomputed and the remote + server will handle verification and error handling. If the upload + is too large and must be transmitted in multiple requests, the + checksum will be incrementally computed and the client will handle + verification and error handling, raising + google.resumable_media.common.DataCorruption on a mismatch and + attempting to delete the corrupted file. Supported values are + "md5", "crc32c" and None. The default is None. + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A google.api_core.retry.Retry value will enable retries, + and the object will define retriable response codes and errors and + configure backoff and timeout options. + + A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a + Retry object and activates it only if certain conditions are met. + This class exists to provide safe defaults for RPC calls that are + not technically safe to retry normally (due to potential data + duplication or other side-effects) but become safe to retry if a + condition such as if_generation_match is set. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Media operations (downloads and uploads) do not support non-default + predicates in a Retry object. The default will always be used. Other + configuration changes for Retry objects such as delays and deadlines + are respected. + + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` + if the upload response returns an error status. + """ + + self._prep_and_do_upload( + file_obj, + rewind=rewind, + size=size, + content_type=content_type, + num_retries=num_retries, + client=client, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + checksum=checksum, + retry=retry, + ) + def upload_from_filename( self, filename, @@ -2681,7 +2838,7 @@ def upload_from_filename( with open(filename, "rb") as file_obj: total_bytes = os.fstat(file_obj.fileno()).st_size - self.upload_from_file( + self._prep_and_do_upload( file_obj, content_type=content_type, num_retries=num_retries, From 8b2897ef7c74f9399067a667e313694b85fba759 Mon Sep 17 00:00:00 2001 From: MiaCY Date: Wed, 21 Jun 2023 11:04:19 -0700 Subject: [PATCH 07/10] Lint reformat --- google/cloud/storage/blob.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 6d4f4aa37..6d6bd7986 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -2385,7 +2385,7 @@ def _do_upload( ) return response.json() - + def _prep_and_do_upload( self, file_obj, @@ -2695,7 +2695,6 @@ def upload_from_file( :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the upload response returns an error status. """ - self._prep_and_do_upload( file_obj, rewind=rewind, @@ -4110,7 +4109,6 @@ def _prep_and_do_download( checksum="md5", retry=DEFAULT_RETRY, ): - """Download the contents of a blob object into a file-like object. See https://cloud.google.com/storage/docs/downloading-objects From 6cfc5baeed06e1af33074f2ae7a7138e34604794 Mon Sep 17 00:00:00 2001 From: MiaCY Date: Tue, 18 Jul 2023 10:05:49 -0700 Subject: [PATCH 08/10] feature: add 'command' argument to private upload/download interface --- google/cloud/storage/_helpers.py | 21 +++++++++---- google/cloud/storage/blob.py | 51 ++++++++++++++++++++++++++++---- tests/unit/test_blob.py | 14 +++++++-- 3 files changed, 73 insertions(+), 13 deletions(-) diff --git a/google/cloud/storage/_helpers.py b/google/cloud/storage/_helpers.py index 29968a9aa..eab96f50f 100644 --- a/google/cloud/storage/_helpers.py +++ b/google/cloud/storage/_helpers.py @@ -599,19 +599,30 @@ def _get_default_headers( user_agent, content_type="application/json; charset=UTF-8", x_upload_content_type=None, + command=None, ): """Get the headers for a request. - Args: - user_agent (str): The user-agent for requests. - Returns: - Dict: The headers to be used for the request. + :type user_agent: str + :param user_agent: The user-agent for requests. + + :type command: str + :param user_agent: (Optional) Information that can be passed into default + header "X-Goog-API-Client". The default is None. + + :rtype: dict + :returns: The headers to be used for the request. """ + if not command: + x_goog_api_client = f"{user_agent} {_get_invocation_id()}" + else: + x_goog_api_client = f"{user_agent} {_get_invocation_id()} gccl-gcs-cmd/{command}" + return { "Accept": "application/json", "Accept-Encoding": "gzip, deflate", "User-Agent": user_agent, - "X-Goog-API-Client": f"{user_agent} {_get_invocation_id()}", + "X-Goog-API-Client": x_goog_api_client, "content-type": content_type, "x-upload-content-type": x_upload_content_type or content_type, } diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 6d6bd7986..fe9db8479 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -1697,7 +1697,7 @@ def _get_writable_metadata(self): return object_metadata - def _get_upload_arguments(self, client, content_type): + def _get_upload_arguments(self, client, content_type, command=None): """Get required arguments for performing an upload. The content type returned will be determined in order of precedence: @@ -1709,6 +1709,10 @@ def _get_upload_arguments(self, client, content_type): :type content_type: str :param content_type: Type of content being uploaded (or :data:`None`). + :type command: str + :param command: (Optional) Information that can be passed into default + header "X-Goog-API-Client". The default is None. + :rtype: tuple :returns: A triple of @@ -1718,7 +1722,7 @@ def _get_upload_arguments(self, client, content_type): """ content_type = self._get_content_type(content_type) headers = { - **_get_default_headers(client._connection.user_agent, content_type), + **_get_default_headers(client._connection.user_agent, content_type, command=command), **_get_encryption_headers(self._encryption_key), } object_metadata = self._get_writable_metadata() @@ -1739,6 +1743,7 @@ def _do_multipart_upload( timeout=_DEFAULT_TIMEOUT, checksum=None, retry=None, + command=None, ): """Perform a multipart upload. @@ -1822,6 +1827,10 @@ def _do_multipart_upload( (google.cloud.storage.retry) for information on retry types and how to configure them. + :type command: str + :param command: (Optional) Information that can be passed into default + header "X-Goog-API-Client". The default is None. + :rtype: :class:`~requests.Response` :returns: The "200 OK" response object returned after the multipart upload request. @@ -1840,7 +1849,7 @@ def _do_multipart_upload( transport = self._get_transport(client) if "metadata" in self._properties and "metadata" not in self._changes: self._changes.add("metadata") - info = self._get_upload_arguments(client, content_type) + info = self._get_upload_arguments(client, content_type, command=command) headers, object_metadata, content_type = info hostname = _get_host_name(client._connection) @@ -1910,6 +1919,7 @@ def _initiate_resumable_upload( timeout=_DEFAULT_TIMEOUT, checksum=None, retry=None, + command=None, ): """Initiate a resumable upload. @@ -2008,6 +2018,10 @@ def _initiate_resumable_upload( (google.cloud.storage.retry) for information on retry types and how to configure them. + :type command: str + :param command: (Optional) Information that can be passed into default + header "X-Goog-API-Client". The default is None. + :rtype: tuple :returns: Pair of @@ -2025,7 +2039,7 @@ def _initiate_resumable_upload( transport = self._get_transport(client) if "metadata" in self._properties and "metadata" not in self._changes: self._changes.add("metadata") - info = self._get_upload_arguments(client, content_type) + info = self._get_upload_arguments(client, content_type, command=command) headers, object_metadata, content_type = info if extra_headers is not None: headers.update(extra_headers) @@ -2103,6 +2117,7 @@ def _do_resumable_upload( timeout=_DEFAULT_TIMEOUT, checksum=None, retry=None, + command=None, ): """Perform a resumable upload. @@ -2191,6 +2206,10 @@ def _do_resumable_upload( (google.cloud.storage.retry) for information on retry types and how to configure them. + :type command: str + :param command: (Optional) Information that can be passed into default + header "X-Goog-API-Client". The default is None. + :rtype: :class:`~requests.Response` :returns: The "200 OK" response object returned after the final chunk is uploaded. @@ -2209,6 +2228,7 @@ def _do_resumable_upload( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) while not upload.finished: try: @@ -2234,6 +2254,7 @@ def _do_upload( timeout=_DEFAULT_TIMEOUT, checksum=None, retry=None, + command=None, ): """Determine an upload strategy and then perform the upload. @@ -2333,6 +2354,10 @@ def _do_upload( configuration changes for Retry objects such as delays and deadlines are respected. + :type command: str + :param command: (Optional) Information that can be passed into default + header "X-Goog-API-Client". The default is None. + :rtype: dict :returns: The parsed JSON from the "200 OK" response. This will be the **only** response in the multipart case and it will be the @@ -2366,6 +2391,7 @@ def _do_upload( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) else: response = self._do_resumable_upload( @@ -2382,6 +2408,7 @@ def _do_upload( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) return response.json() @@ -2402,6 +2429,7 @@ def _prep_and_do_upload( timeout=_DEFAULT_TIMEOUT, checksum=None, retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + command=None, ): """Upload the contents of this blob from a file-like object. @@ -2522,6 +2550,10 @@ def _prep_and_do_upload( configuration changes for Retry objects such as delays and deadlines are respected. + :type command: str + :param command: (Optional) Information that can be passed into default + header "X-Goog-API-Client". The default is None. + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the upload response returns an error status. """ @@ -2551,6 +2583,7 @@ def _prep_and_do_upload( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) self._set_properties(created_json) except resumable_media.InvalidResponse as exc: @@ -4108,6 +4141,7 @@ def _prep_and_do_download( timeout=_DEFAULT_TIMEOUT, checksum="md5", retry=DEFAULT_RETRY, + command=None, ): """Download the contents of a blob object into a file-like object. @@ -4195,6 +4229,10 @@ def _prep_and_do_download( predicates in a Retry object. The default will always be used. Other configuration changes for Retry objects such as delays and deadlines are respected. + + :type command: str + :param command: (Optional) Information that can be passed into default + header "X-Goog-API-Client". The default is None. """ # Handle ConditionalRetryPolicy. if isinstance(retry, ConditionalRetryPolicy): @@ -4224,7 +4262,10 @@ def _prep_and_do_download( if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match, ) - headers = {**_get_default_headers(client._connection.user_agent), **headers} + headers = { + **_get_default_headers(client._connection.user_agent, command=command), + **headers, + } transport = client._http diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index 658dba81e..5b72764dc 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -2251,11 +2251,12 @@ def test__get_upload_arguments(self): blob = self._make_one(name, bucket=None, encryption_key=key) blob.content_disposition = "inline" + COMMAND = "tm.upload_many" content_type = "image/jpeg" with patch.object( _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST ): - info = blob._get_upload_arguments(client, content_type) + info = blob._get_upload_arguments(client, content_type, command=COMMAND) headers, object_metadata, new_content_type = info header_key_value = "W3BYd0AscEBAQWZCZnJSM3gtMmIyU0NIUiwuP1l3Uk8=" @@ -2264,11 +2265,15 @@ def test__get_upload_arguments(self): _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST ): expected_headers = { - **_get_default_headers(client._connection.user_agent, content_type), + **_get_default_headers(client._connection.user_agent, content_type, command=COMMAND), "X-Goog-Encryption-Algorithm": "AES256", "X-Goog-Encryption-Key": header_key_value, "X-Goog-Encryption-Key-Sha256": header_key_hash_value, } + self.assertEqual( + headers["X-Goog-API-Client"], + f"{client._connection.user_agent} {GCCL_INVOCATION_TEST_CONST} gccl-gcs-cmd/{COMMAND}" + ) self.assertEqual(headers, expected_headers) expected_metadata = { "contentDisposition": blob.content_disposition, @@ -3184,6 +3189,7 @@ def _do_upload_helper( timeout=expected_timeout, checksum=None, retry=retry, + command=None, ) blob._do_resumable_upload.assert_not_called() else: @@ -3202,6 +3208,7 @@ def _do_upload_helper( timeout=expected_timeout, checksum=None, retry=retry, + command=None, ) def test__do_upload_uses_multipart(self): @@ -3294,6 +3301,7 @@ def _upload_from_file_helper(self, side_effect=None, **kwargs): timeout=expected_timeout, checksum=None, retry=retry, + command=None, ) return stream @@ -3385,7 +3393,7 @@ def _do_upload_mock_call_helper( if not retry: retry = DEFAULT_RETRY_IF_GENERATION_SPECIFIED if not num_retries else None self.assertEqual( - kwargs, {"timeout": expected_timeout, "checksum": None, "retry": retry} + kwargs, {"timeout": expected_timeout, "checksum": None, "retry": retry, "command": None,} ) return pos_args[1] From a7927c5c66dd49aa7dcd8c425d82975a338f79f6 Mon Sep 17 00:00:00 2001 From: MiaCY Date: Wed, 19 Jul 2023 12:09:24 -0700 Subject: [PATCH 09/10] lint reformat --- google/cloud/storage/_helpers.py | 6 ++++-- google/cloud/storage/blob.py | 18 ++++++++++-------- tests/unit/test_blob.py | 14 +++++++++++--- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/google/cloud/storage/_helpers.py b/google/cloud/storage/_helpers.py index eab96f50f..97c527002 100644 --- a/google/cloud/storage/_helpers.py +++ b/google/cloud/storage/_helpers.py @@ -607,7 +607,7 @@ def _get_default_headers( :param user_agent: The user-agent for requests. :type command: str - :param user_agent: (Optional) Information that can be passed into default + :param user_agent: (Optional) Information that can be passed into default header "X-Goog-API-Client". The default is None. :rtype: dict @@ -616,7 +616,9 @@ def _get_default_headers( if not command: x_goog_api_client = f"{user_agent} {_get_invocation_id()}" else: - x_goog_api_client = f"{user_agent} {_get_invocation_id()} gccl-gcs-cmd/{command}" + x_goog_api_client = ( + f"{user_agent} {_get_invocation_id()} gccl-gcs-cmd/{command}" + ) return { "Accept": "application/json", diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index fe9db8479..22738d7e8 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -1710,7 +1710,7 @@ def _get_upload_arguments(self, client, content_type, command=None): :param content_type: Type of content being uploaded (or :data:`None`). :type command: str - :param command: (Optional) Information that can be passed into default + :param command: (Optional) Information that can be passed into default header "X-Goog-API-Client". The default is None. :rtype: tuple @@ -1722,7 +1722,9 @@ def _get_upload_arguments(self, client, content_type, command=None): """ content_type = self._get_content_type(content_type) headers = { - **_get_default_headers(client._connection.user_agent, content_type, command=command), + **_get_default_headers( + client._connection.user_agent, content_type, command=command + ), **_get_encryption_headers(self._encryption_key), } object_metadata = self._get_writable_metadata() @@ -1828,7 +1830,7 @@ def _do_multipart_upload( to configure them. :type command: str - :param command: (Optional) Information that can be passed into default + :param command: (Optional) Information that can be passed into default header "X-Goog-API-Client". The default is None. :rtype: :class:`~requests.Response` @@ -2019,7 +2021,7 @@ def _initiate_resumable_upload( to configure them. :type command: str - :param command: (Optional) Information that can be passed into default + :param command: (Optional) Information that can be passed into default header "X-Goog-API-Client". The default is None. :rtype: tuple @@ -2207,7 +2209,7 @@ def _do_resumable_upload( to configure them. :type command: str - :param command: (Optional) Information that can be passed into default + :param command: (Optional) Information that can be passed into default header "X-Goog-API-Client". The default is None. :rtype: :class:`~requests.Response` @@ -2355,7 +2357,7 @@ def _do_upload( are respected. :type command: str - :param command: (Optional) Information that can be passed into default + :param command: (Optional) Information that can be passed into default header "X-Goog-API-Client". The default is None. :rtype: dict @@ -2551,7 +2553,7 @@ def _prep_and_do_upload( are respected. :type command: str - :param command: (Optional) Information that can be passed into default + :param command: (Optional) Information that can be passed into default header "X-Goog-API-Client". The default is None. :raises: :class:`~google.cloud.exceptions.GoogleCloudError` @@ -4231,7 +4233,7 @@ def _prep_and_do_download( are respected. :type command: str - :param command: (Optional) Information that can be passed into default + :param command: (Optional) Information that can be passed into default header "X-Goog-API-Client". The default is None. """ # Handle ConditionalRetryPolicy. diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index 5b72764dc..a8d024176 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -2265,14 +2265,16 @@ def test__get_upload_arguments(self): _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST ): expected_headers = { - **_get_default_headers(client._connection.user_agent, content_type, command=COMMAND), + **_get_default_headers( + client._connection.user_agent, content_type, command=COMMAND + ), "X-Goog-Encryption-Algorithm": "AES256", "X-Goog-Encryption-Key": header_key_value, "X-Goog-Encryption-Key-Sha256": header_key_hash_value, } self.assertEqual( headers["X-Goog-API-Client"], - f"{client._connection.user_agent} {GCCL_INVOCATION_TEST_CONST} gccl-gcs-cmd/{COMMAND}" + f"{client._connection.user_agent} {GCCL_INVOCATION_TEST_CONST} gccl-gcs-cmd/{COMMAND}", ) self.assertEqual(headers, expected_headers) expected_metadata = { @@ -3393,7 +3395,13 @@ def _do_upload_mock_call_helper( if not retry: retry = DEFAULT_RETRY_IF_GENERATION_SPECIFIED if not num_retries else None self.assertEqual( - kwargs, {"timeout": expected_timeout, "checksum": None, "retry": retry, "command": None,} + kwargs, + { + "timeout": expected_timeout, + "checksum": None, + "retry": retry, + "command": None, + }, ) return pos_args[1] From 31561b412156eb12fba18f0d8e269381fba0fcd3 Mon Sep 17 00:00:00 2001 From: MiaCY Date: Tue, 25 Jul 2023 10:36:38 -0700 Subject: [PATCH 10/10] reduce duplication and edit docstring --- google/cloud/storage/_helpers.py | 14 ++++++-------- google/cloud/storage/blob.py | 28 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/google/cloud/storage/_helpers.py b/google/cloud/storage/_helpers.py index 97c527002..3ff52dd75 100644 --- a/google/cloud/storage/_helpers.py +++ b/google/cloud/storage/_helpers.py @@ -607,18 +607,16 @@ def _get_default_headers( :param user_agent: The user-agent for requests. :type command: str - :param user_agent: (Optional) Information that can be passed into default - header "X-Goog-API-Client". The default is None. + :param user_agent: + (Optional) Information about which interface for upload/download was used, to be included in the X-Goog-API-Client header for traffic analysis purposes. Please leave as None unless otherwise directed. :rtype: dict :returns: The headers to be used for the request. """ - if not command: - x_goog_api_client = f"{user_agent} {_get_invocation_id()}" - else: - x_goog_api_client = ( - f"{user_agent} {_get_invocation_id()} gccl-gcs-cmd/{command}" - ) + x_goog_api_client = f"{user_agent} {_get_invocation_id()}" + + if command: + x_goog_api_client += f" gccl-gcs-cmd/{command}" return { "Accept": "application/json", diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 22738d7e8..32b8a62db 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -1710,8 +1710,8 @@ def _get_upload_arguments(self, client, content_type, command=None): :param content_type: Type of content being uploaded (or :data:`None`). :type command: str - :param command: (Optional) Information that can be passed into default - header "X-Goog-API-Client". The default is None. + :param command: + (Optional) Information about which interface for upload was used, to be included in the X-Goog-API-Client header for traffic analysis purposes. Please leave as None unless otherwise directed. :rtype: tuple :returns: A triple of @@ -1830,8 +1830,8 @@ def _do_multipart_upload( to configure them. :type command: str - :param command: (Optional) Information that can be passed into default - header "X-Goog-API-Client". The default is None. + :param command: + (Optional) Information about which interface for upload was used, to be included in the X-Goog-API-Client header for traffic analysis purposes. Please leave as None unless otherwise directed. :rtype: :class:`~requests.Response` :returns: The "200 OK" response object returned after the multipart @@ -2021,8 +2021,8 @@ def _initiate_resumable_upload( to configure them. :type command: str - :param command: (Optional) Information that can be passed into default - header "X-Goog-API-Client". The default is None. + :param command: + (Optional) Information about which interface for upload was used, to be included in the X-Goog-API-Client header for traffic analysis purposes. Please leave as None unless otherwise directed. :rtype: tuple :returns: @@ -2209,8 +2209,8 @@ def _do_resumable_upload( to configure them. :type command: str - :param command: (Optional) Information that can be passed into default - header "X-Goog-API-Client". The default is None. + :param command: + (Optional) Information about which interface for upload was used, to be included in the X-Goog-API-Client header for traffic analysis purposes. Please leave as None unless otherwise directed. :rtype: :class:`~requests.Response` :returns: The "200 OK" response object returned after the final chunk @@ -2357,8 +2357,8 @@ def _do_upload( are respected. :type command: str - :param command: (Optional) Information that can be passed into default - header "X-Goog-API-Client". The default is None. + :param command: + (Optional) Information about which interface for upload was used, to be included in the X-Goog-API-Client header for traffic analysis purposes. Please leave as None unless otherwise directed. :rtype: dict :returns: The parsed JSON from the "200 OK" response. This will be the @@ -2553,8 +2553,8 @@ def _prep_and_do_upload( are respected. :type command: str - :param command: (Optional) Information that can be passed into default - header "X-Goog-API-Client". The default is None. + :param command: + (Optional) Information about which interface for upload was used, to be included in the X-Goog-API-Client header for traffic analysis purposes. Please leave as None unless otherwise directed. :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the upload response returns an error status. @@ -4233,8 +4233,8 @@ def _prep_and_do_download( are respected. :type command: str - :param command: (Optional) Information that can be passed into default - header "X-Goog-API-Client". The default is None. + :param command: + (Optional) Information about which interface for download was used, to be included in the X-Goog-API-Client header for traffic analysis purposes. Please leave as None unless otherwise directed. """ # Handle ConditionalRetryPolicy. if isinstance(retry, ConditionalRetryPolicy):