From 3b7155dfa8c766f310facc77278936eb7babdabd Mon Sep 17 00:00:00 2001 From: Cathy Ouyang Date: Fri, 20 Oct 2023 16:54:30 -0700 Subject: [PATCH] fix: Blob.from_string parse storage uri with regex --- google/cloud/storage/blob.py | 13 +++++++------ tests/unit/test_blob.py | 14 +++++++++++--- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index aebf24c26..33998f81a 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -134,7 +134,9 @@ "Blob.download_as_string() is deprecated and will be removed in future. " "Use Blob.download_as_bytes() instead." ) - +_GS_URL_REGEX_PATTERN = re.compile( + r"(?Pgs)://(?P[a-z0-9_.-]+)/(?P.+)" +) _DEFAULT_CHUNKSIZE = 104857600 # 1024 * 1024 B * 100 = 100 MB _MAX_MULTIPART_SIZE = 8388608 # 8 MB @@ -403,12 +405,11 @@ def from_string(cls, uri, client=None): """ from google.cloud.storage.bucket import Bucket - scheme, netloc, path, query, frag = urlsplit(uri) - if scheme != "gs": + match = _GS_URL_REGEX_PATTERN.match(uri) + if not match: raise ValueError("URI scheme must be gs") - - bucket = Bucket(client, name=netloc) - return cls(path[1:], bucket) + bucket = Bucket(client, name=match.group("bucket_name")) + return cls(match.group("object_name"), bucket) def generate_signed_url( self, diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index cb164f6e2..d5058e23c 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -5819,13 +5819,21 @@ def test_from_string_w_valid_uri(self): from google.cloud.storage.blob import Blob client = self._make_client() - uri = "gs://BUCKET_NAME/b" - blob = Blob.from_string(uri, client) + basic_uri = "gs://bucket_name/b" + blob = Blob.from_string(basic_uri, client) self.assertIsInstance(blob, Blob) self.assertIs(blob.client, client) self.assertEqual(blob.name, "b") - self.assertEqual(blob.bucket.name, "BUCKET_NAME") + self.assertEqual(blob.bucket.name, "bucket_name") + + nested_uri = "gs://bucket_name/path1/path2/b#name" + blob = Blob.from_string(nested_uri, client) + + self.assertIsInstance(blob, Blob) + self.assertIs(blob.client, client) + self.assertEqual(blob.name, "path1/path2/b#name") + self.assertEqual(blob.bucket.name, "bucket_name") def test_from_string_w_invalid_uri(self): from google.cloud.storage.blob import Blob