From 8287af1299169546f847126f03ae04e48890139e Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 29 Aug 2022 11:59:29 -0700 Subject: [PATCH 001/536] fix: validate opentelemetry span job attributes have values (#1327) fix: validate opentelemetry span job attributes have values There are several job properties that are optional. Existing opentelemetry instrumentation disallows span attribute keys without appropriate values, so this change validates field presence before propagating. --- .../cloud/bigquery/opentelemetry_tracing.py | 12 ++++++--- tests/unit/test_opentelemetry_tracing.py | 26 +++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index adecea121..2345fd1bb 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -107,10 +107,7 @@ def _set_client_attributes(client): def _set_job_attributes(job_ref): job_attributes = { "db.name": job_ref.project, - "location": job_ref.location, - "num_child_jobs": job_ref.num_child_jobs, "job_id": job_ref.job_id, - "parent_job_id": job_ref.parent_job_id, "state": job_ref.state, } @@ -125,4 +122,13 @@ def _set_job_attributes(job_ref): if job_ref.ended is not None: job_attributes["timeEnded"] = job_ref.ended.isoformat() + if job_ref.location is not None: + job_attributes["location"] = job_ref.location + + if job_ref.parent_job_id is not None: + job_attributes["parent_job_id"] = job_ref.parent_job_id + + if job_ref.num_child_jobs is not None: + job_attributes["num_child_jobs"] = job_ref.num_child_jobs + return job_attributes diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index 3021a3dbf..4cc58713c 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -164,6 +164,32 @@ def test_default_job_attributes(setup): assert span.attributes == expected_attributes +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +def test_optional_job_attributes(setup): + # This test ensures we don't propagate unset values into span attributes + import google.cloud._helpers + + time_created = datetime.datetime( + 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + + with mock.patch("google.cloud.bigquery.job._AsyncJob") as test_job_ref: + test_job_ref.job_id = "test_job_id" + test_job_ref.location = None + test_job_ref.project = "test_project_id" + test_job_ref.created = time_created + test_job_ref.state = "some_job_state" + test_job_ref.num_child_jobs = None + test_job_ref.parent_job_id = None + + with opentelemetry_tracing.create_span( + TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref + ) as span: + assert span is not None + for val in span.attributes.values(): + assert val is not None + + @pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") def test_default_no_data_leakage(setup): import google.auth.credentials From 1caf94f9476be058290b5304e54426a1e714e791 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 30 Aug 2022 09:32:14 -0400 Subject: [PATCH 002/536] chore(python): exclude grpcio==1.49.0rc1 in tests [autoapprove] (#1328) * chore(python): exclude `grpcio==1.49.0rc1` in tests Source-Link: https://github.com/googleapis/synthtool/commit/c4dd5953003d13b239f872d329c3146586bb417e Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:ce3c1686bc81145c81dd269bd12c4025c6b275b22d14641358827334fddb1d72 * chore(python): exclude grpcio==1.49.0rc1 in tests Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/requirements.txt | 6 +++--- noxfile.py | 7 +++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index c6acdf3f9..23e106b65 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:562802bfac02e012a6ac34eda282f81d06e77326b82a32d7bbb1369ff552b387 -# created: 2022-08-24T17:07:22.006876712Z + digest: sha256:ce3c1686bc81145c81dd269bd12c4025c6b275b22d14641358827334fddb1d72 +# created: 2022-08-29T17:28:30.441852797Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index c4b824f24..4b29ef247 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -136,9 +136,9 @@ cryptography==37.0.4 \ # via # gcp-releasetool # secretstorage -distlib==0.3.5 \ - --hash=sha256:a7f75737c70be3b25e2bee06288cec4e4c221de18455b2dd037fe2a795cab2fe \ - --hash=sha256:b710088c59f06338ca514800ad795a132da19fda270e3ce4affc74abf955a26c +distlib==0.3.6 \ + --hash=sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46 \ + --hash=sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e # via virtualenv docutils==0.19 \ --hash=sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6 \ diff --git a/noxfile.py b/noxfile.py index c6f7c76b1..d9883d069 100644 --- a/noxfile.py +++ b/noxfile.py @@ -160,7 +160,9 @@ def system(session): session.skip("Credentials must be set via environment variable.") # Use pre-release gRPC for system tests. - session.install("--pre", "grpcio", "-c", constraints_path) + # Exclude version 1.49.0rc1 which has a known issue. + # See https://github.com/grpc/grpc/pull/30642 + session.install("--pre", "grpcio!=1.49.0rc1", "-c", constraints_path) # Install all test dependencies, then install local packages in place. session.install( @@ -288,7 +290,8 @@ def prerelease_deps(session): "google-cloud-bigquery-storage", "google-cloud-core", "google-resumable-media", - "grpcio", + # Exclude version 1.49.0rc1 which has a known issue. See https://github.com/grpc/grpc/pull/30642 + "grpcio!=1.49.0rc1", ) session.install( "freezegun", From 1de161b4961b3ac7e28a4bc89667c09c46681228 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 30 Aug 2022 18:26:38 +0200 Subject: [PATCH 003/536] chore(deps): update dependency proto-plus to v1.22.1 (#1330) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index feca08cca..99fd900e1 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -27,7 +27,7 @@ mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.3; python_version >= '3.8' -proto-plus==1.22.0 +proto-plus==1.22.1 pyarrow==9.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 From ce80d6ec91c7c8499b7a7e48bcaa4878cb6dae82 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 31 Aug 2022 17:46:08 +0200 Subject: [PATCH 004/536] chore(deps): update dependency pandas to v1.4.4 (#1332) --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 99fd900e1..9a5f30244 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -26,7 +26,7 @@ munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' -pandas==1.4.3; python_version >= '3.8' +pandas==1.4.4; python_version >= '3.8' proto-plus==1.22.1 pyarrow==9.0.0 pyasn1==0.4.8 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 4c0a67a18..c85c78961 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -7,7 +7,7 @@ ipython===8.0.1; python_version == '3.8' ipython==8.4.0; python_version >= '3.9' matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' -pandas==1.4.3; python_version >= '3.8' +pandas==1.4.4; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.2.1 typing-extensions==4.3.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index bbef52e66..dbfb880fd 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,7 +8,7 @@ ipython===8.0.1; python_version == '3.8' ipython==8.4.0; python_version >= '3.9' matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' -pandas==1.4.3; python_version >= '3.8' +pandas==1.4.4; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.2.1 typing-extensions==4.3.0 From 7532f58f5124ce7f10f045642d26524eb0e3081d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 1 Sep 2022 20:33:20 +0200 Subject: [PATCH 005/536] chore(deps): update dependency google-crc32c to v1.5.0 (#1334) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 9a5f30244..1b479edf1 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -16,7 +16,7 @@ google-auth==2.11.0 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.14.2 google-cloud-core==2.3.2 -google-crc32c==1.3.0 +google-crc32c==1.5.0 google-resumable-media==2.3.3 googleapis-common-protos==1.56.4 grpcio==1.47.0 From b9e882fcde4288b934b494538be5343a5e18b112 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 1 Sep 2022 20:30:17 +0000 Subject: [PATCH 006/536] ci(python): fix path to requirements.txt in release script (#1335) Source-Link: https://github.com/googleapis/synthtool/commit/fdba3ed145bdb2f4f3eff434d4284b1d03b80d34 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:1f0dbd02745fb7cf255563dab5968345989308544e52b7f460deadd5e78e63b0 --- .github/.OwlBot.lock.yaml | 3 +-- .kokoro/release.sh | 2 +- .kokoro/requirements.txt | 24 ++++++++++++------------ 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 23e106b65..0d9eb2af9 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:ce3c1686bc81145c81dd269bd12c4025c6b275b22d14641358827334fddb1d72 -# created: 2022-08-29T17:28:30.441852797Z + digest: sha256:1f0dbd02745fb7cf255563dab5968345989308544e52b7f460deadd5e78e63b0 diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 879f9ef84..c6a7c9460 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -16,7 +16,7 @@ set -eo pipefail # Start the releasetool reporter -python3 -m pip install --require-hashes -r .kokoro/requirements.txt +python3 -m pip install --require-hashes -r github/python-bigquery/.kokoro/requirements.txt python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source /tmp/publisher-script # Disable buffering, so that the logs stream through. diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 4b29ef247..92b2f727e 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -100,9 +100,9 @@ click==8.0.4 \ # via # gcp-docuploader # gcp-releasetool -colorlog==6.6.0 \ - --hash=sha256:344f73204009e4c83c5b6beb00b3c45dc70fcdae3c80db919e0a4171d006fde8 \ - --hash=sha256:351c51e866c86c3217f08e4b067a7974a678be78f07f85fc2d55b8babde6d94e +colorlog==6.7.0 \ + --hash=sha256:0d33ca236784a1ba3ff9c532d4964126d8a2c44f1f0cb1d2b0728196f512f662 \ + --hash=sha256:bd94bd21c1e13fac7bd3153f4bc3a7dc0eb0974b8bc2fdf1a989e474f6e582e5 # via # gcp-docuploader # nox @@ -152,9 +152,9 @@ gcp-docuploader==0.6.3 \ --hash=sha256:ba8c9d76b3bbac54b0311c503a373b00edc2dc02d6d54ea9507045adb8e870f7 \ --hash=sha256:c0f5aaa82ce1854a386197e4e359b120ad6d4e57ae2c812fce42219a3288026b # via -r requirements.in -gcp-releasetool==1.8.6 \ - --hash=sha256:42e51ab8e2e789bc8e22a03c09352962cd3452951c801a2230d564816630304a \ - --hash=sha256:a3518b79d1b243c494eac392a01c7fd65187fd6d52602dcab9b529bc934d4da1 +gcp-releasetool==1.8.7 \ + --hash=sha256:3d2a67c9db39322194afb3b427e9cb0476ce8f2a04033695f0aeb63979fc2b37 \ + --hash=sha256:5e4d28f66e90780d77f3ecf1e9155852b0c3b13cbccb08ab07e66b2357c8da8d # via -r requirements.in google-api-core==2.8.2 \ --hash=sha256:06f7244c640322b508b125903bb5701bebabce8832f85aba9335ec00b3d02edc \ @@ -251,9 +251,9 @@ jinja2==3.1.2 \ --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 # via gcp-releasetool -keyring==23.8.2 \ - --hash=sha256:0d9973f8891850f1ade5f26aafd06bb16865fbbae3fc56b0defb6a14a2624003 \ - --hash=sha256:10d2a8639663fe2090705a00b8c47c687cacdf97598ea9c11456679fa974473a +keyring==23.9.0 \ + --hash=sha256:4c32a31174faaee48f43a7e2c7e9c3216ec5e95acf22a2bebfb4a1d05056ee44 \ + --hash=sha256:98f060ec95ada2ab910c195a2d4317be6ef87936a766b239c46aa3c7aac4f0db # via # gcp-releasetool # twine @@ -440,9 +440,9 @@ urllib3==1.26.12 \ # via # requests # twine -virtualenv==20.16.3 \ - --hash=sha256:4193b7bc8a6cd23e4eb251ac64f29b4398ab2c233531e66e40b19a6b7b0d30c1 \ - --hash=sha256:d86ea0bb50e06252d79e6c241507cb904fcd66090c3271381372d6221a3970f9 +virtualenv==20.16.4 \ + --hash=sha256:014f766e4134d0008dcaa1f95bafa0fb0f575795d07cae50b1bee514185d6782 \ + --hash=sha256:035ed57acce4ac35c82c9d8802202b0e71adac011a511ff650cbcf9635006a22 # via nox webencodings==0.5.1 \ --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \ From 5aeedaa2f4e6a0200d50521dfd90f39f9a24d0cc Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 1 Sep 2022 20:35:40 -0400 Subject: [PATCH 007/536] fix: uses function (create_job) more appropriate to the described sample intent (#1309) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: uses function more appropriate to the described title * adds additional explanation for the end users * adds REST API URL for reference * corrects flake 8 linter errors * blackens file * adds type hints * avoids unreliable version of grpcio * updates imports to fix linting error * better method to avoid grpcio 1.49.0rc1 * Update samples/create_job.py Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> * adds further explanation on when/why to use create_jobs * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates references Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> Co-authored-by: Owl Bot --- samples/create_job.py | 52 ++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/samples/create_job.py b/samples/create_job.py index 39922f7ae..129a08a1b 100644 --- a/samples/create_job.py +++ b/samples/create_job.py @@ -13,12 +13,13 @@ # limitations under the License. import typing +from typing import Union if typing.TYPE_CHECKING: - from google.cloud import bigquery + from google.cloud.bigquery import LoadJob, CopyJob, ExtractJob, QueryJob -def create_job() -> "bigquery.QueryJob": +def create_job() -> "Union[LoadJob, CopyJob, ExtractJob, QueryJob]": # [START bigquery_create_job] from google.cloud import bigquery @@ -26,20 +27,41 @@ def create_job() -> "bigquery.QueryJob": # Construct a BigQuery client object. client = bigquery.Client() - query_job = client.query( - "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`", - # Explicitly force job execution to be routed to a specific processing - # location. - location="US", - # Specify a job configuration to set optional job resource properties. - job_config=bigquery.QueryJobConfig( - labels={"example-label": "example-value"}, maximum_bytes_billed=1000000 - ), - # The client libraries automatically generate a job ID. Override the - # generated ID with either the job_id_prefix or job_id parameters. - job_id_prefix="code_sample_", + query_job = client.create_job( + # Specify a job configuration, providing a query + # and/or optional job resource properties, as needed. + # The job instance can be a LoadJob, CopyJob, ExtractJob, QueryJob + # Here, we demonstrate a "query" job. + # References: + # https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html#google.cloud.bigquery.client.Client.create_job + # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job + # + # Example use cases for .create_job() include: + # * to retry failed jobs + # * to generate jobs with an experimental API property that hasn't + # been added to one of the manually written job configuration + # classes yet + # + # NOTE: unless it is necessary to create a job in this way, the + # preferred approach is to use one of the dedicated API calls: + # client.query() + # client.extract_table() + # client.copy_table() + # client.load_table_file(), client.load_table_from_dataframe(), etc + job_config={ + "query": { + "query": """ + SELECT country_name + FROM `bigquery-public-data.utility_us.country_code_iso` + LIMIT 5 + """, + }, + "labels": {"example-label": "example-value"}, + "maximum_bytes_billed": 10000000, + } ) # Make an API request. - print("Started job: {}".format(query_job.job_id)) + print(f"Started job: {query_job.job_id}") # [END bigquery_create_job] + return query_job From 075df0e8c8d3ff90b452a4718780a99359607e6a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 1 Sep 2022 18:27:08 -0700 Subject: [PATCH 008/536] chore(python): update .kokoro/requirements.txt (#1336) Source-Link: https://github.com/googleapis/synthtool/commit/703554a14c7479542335b62fa69279f93a9e38ec Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:94961fdc5c9ca6d13530a6a414a49d2f607203168215d074cdb0a1df9ec31c0b Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/requirements.txt | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 0d9eb2af9..2fa0f7c4f 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:1f0dbd02745fb7cf255563dab5968345989308544e52b7f460deadd5e78e63b0 + digest: sha256:94961fdc5c9ca6d13530a6a414a49d2f607203168215d074cdb0a1df9ec31c0b diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 92b2f727e..385f2d4d6 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -241,6 +241,10 @@ importlib-metadata==4.12.0 \ # via # -r requirements.in # twine +jaraco-classes==3.2.2 \ + --hash=sha256:6745f113b0b588239ceb49532aa09c3ebb947433ce311ef2f8e3ad64ebb74594 \ + --hash=sha256:e6ef6fd3fcf4579a7a019d87d1e56a883f4e4c35cfe925f86731abc58804e647 + # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755 @@ -299,6 +303,10 @@ markupsafe==2.1.1 \ --hash=sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a \ --hash=sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7 # via jinja2 +more-itertools==8.14.0 \ + --hash=sha256:1bc4f91ee5b1b31ac7ceacc17c09befe6a40a503907baf9c839c229b5095cfd2 \ + --hash=sha256:c09443cd3d5438b8dafccd867a6bc1cb0894389e90cb53d227456b0b0bccb750 + # via jaraco-classes nox==2022.8.7 \ --hash=sha256:1b894940551dc5c389f9271d197ca5d655d40bdc6ccf93ed6880e4042760a34b \ --hash=sha256:96cca88779e08282a699d672258ec01eb7c792d35bbbf538c723172bce23212c From bfc92ffdf781644e5b16db3a68e5e2337efeb6df Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 2 Sep 2022 18:40:23 +0000 Subject: [PATCH 009/536] chore(python): exclude setup.py in renovate config (#1338) Source-Link: https://github.com/googleapis/synthtool/commit/56da63e80c384a871356d1ea6640802017f213b4 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:993a058718e84a82fda04c3177e58f0a43281a996c7c395e0a56ccc4d6d210d7 --- .github/.OwlBot.lock.yaml | 2 +- renovate.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 2fa0f7c4f..b8dcb4a4a 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:94961fdc5c9ca6d13530a6a414a49d2f607203168215d074cdb0a1df9ec31c0b + digest: sha256:993a058718e84a82fda04c3177e58f0a43281a996c7c395e0a56ccc4d6d210d7 diff --git a/renovate.json b/renovate.json index 566a70f3c..39b2a0ec9 100644 --- a/renovate.json +++ b/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] } From 119dd1e350a824955d36c6f0a49157335ffb86a8 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 6 Sep 2022 16:15:19 +0200 Subject: [PATCH 010/536] chore(deps): update all dependencies (#1339) --- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index fb466e509..6f722c66e 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.1.2 +pytest==7.1.3 mock==4.0.3 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1b479edf1..ff6754a35 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.21 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' -google-api-core==2.8.2 +google-api-core==2.10.0 google-auth==2.11.0 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.14.2 @@ -19,7 +19,7 @@ google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.3.3 googleapis-common-protos==1.56.4 -grpcio==1.47.0 +grpcio==1.48.1 idna==3.3 libcst==0.4.7 munch==2.5.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 856751fc1..7902c72ef 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.1.2 +pytest==7.1.3 mock==4.0.3 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index c85c78961..a0807f8a9 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,10 +1,10 @@ db-dtypes==1.0.3 google-cloud-bigquery-storage==2.14.2 google-auth-oauthlib==0.5.2 -grpcio==1.47.0 +grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.4.0; python_version >= '3.9' +ipython==8.5.0; python_version >= '3.9' matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.4; python_version >= '3.8' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 856751fc1..7902c72ef 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.1.2 +pytest==7.1.3 mock==4.0.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index dbfb880fd..e39d074ee 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,10 +2,10 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.14.2 google-auth-oauthlib==0.5.2 -grpcio==1.47.0 +grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.4.0; python_version >= '3.9' +ipython==8.5.0; python_version >= '3.9' matplotlib==3.5.3 pandas===1.3.5; python_version == '3.7' pandas==1.4.4; python_version >= '3.8' From 543a8e4026d8aa5b6de377cc765e8aaa5279a58c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 6 Sep 2022 21:07:19 +0200 Subject: [PATCH 011/536] chore(deps): update dependency google-cloud-bigquery-storage to v2.15.0 (#1342) --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ff6754a35..a73ea6b03 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas==0.11.1; python_version >= '3.8' google-api-core==2.10.0 google-auth==2.11.0 google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.14.2 +google-cloud-bigquery-storage==2.15.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.3.3 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index a0807f8a9..64e5e36e3 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.3 -google-cloud-bigquery-storage==2.14.2 +google-cloud-bigquery-storage==2.15.0 google-auth-oauthlib==0.5.2 grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index e39d074ee..a12770999 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.14.2 +google-cloud-bigquery-storage==2.15.0 google-auth-oauthlib==0.5.2 grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' From 6239fa52b22380439b3fddd7c2e88de97a5cb733 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 12 Sep 2022 16:45:17 +0200 Subject: [PATCH 012/536] chore(deps): update dependency certifi to v2022.6.15.1 (#1346) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index a73ea6b03..5697f264c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==22.1.0 -certifi==2022.6.15 +certifi==2022.6.15.1 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 From 469f7733bbb76a91a108a42c2d2a4c94c48012fc Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 13 Sep 2022 16:24:18 +0000 Subject: [PATCH 013/536] chore: detect samples tests in nested directories (#1349) Source-Link: https://github.com/googleapis/synthtool/commit/50db768f450a50d7c1fd62513c113c9bb96fd434 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:e09366bdf0fd9c8976592988390b24d53583dd9f002d476934da43725adbb978 --- .github/.OwlBot.lock.yaml | 2 +- samples/geography/noxfile.py | 6 ++++-- samples/magics/noxfile.py | 6 ++++-- samples/snippets/noxfile.py | 6 ++++-- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index b8dcb4a4a..aa547962e 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:993a058718e84a82fda04c3177e58f0a43281a996c7c395e0a56ccc4d6d210d7 + digest: sha256:e09366bdf0fd9c8976592988390b24d53583dd9f002d476934da43725adbb978 diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 29b5bc852..b053ca568 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -208,8 +208,10 @@ def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: # check for presence of tests - test_list = glob.glob("*_test.py") + glob.glob("test_*.py") - test_list.extend(glob.glob("tests")) + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob( + "**/test_*.py", recursive=True + ) + test_list.extend(glob.glob("**/tests", recursive=True)) if len(test_list) == 0: print("No tests found, skipping directory.") diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index 29b5bc852..b053ca568 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -208,8 +208,10 @@ def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: # check for presence of tests - test_list = glob.glob("*_test.py") + glob.glob("test_*.py") - test_list.extend(glob.glob("tests")) + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob( + "**/test_*.py", recursive=True + ) + test_list.extend(glob.glob("**/tests", recursive=True)) if len(test_list) == 0: print("No tests found, skipping directory.") diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 29b5bc852..b053ca568 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -208,8 +208,10 @@ def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: # check for presence of tests - test_list = glob.glob("*_test.py") + glob.glob("test_*.py") - test_list.extend(glob.glob("tests")) + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob( + "**/test_*.py", recursive=True + ) + test_list.extend(glob.glob("**/tests", recursive=True)) if len(test_list) == 0: print("No tests found, skipping directory.") From 34a3f5cf34d4a08889fe1407f4ad6ce3c9d93838 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 14 Sep 2022 15:50:05 +0200 Subject: [PATCH 014/536] chore(deps): update all dependencies (#1351) --- samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5697f264c..d210da445 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==22.1.0 -certifi==2022.6.15.1 +certifi==2022.6.15.2 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 @@ -20,7 +20,7 @@ google-crc32c==1.5.0 google-resumable-media==2.3.3 googleapis-common-protos==1.56.4 grpcio==1.48.1 -idna==3.3 +idna==3.4 libcst==0.4.7 munch==2.5.0 mypy-extensions==0.4.3 From 7499655e47517cdc7de8e6e06ad86ed2774ae366 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 14 Sep 2022 19:47:46 +0200 Subject: [PATCH 015/536] chore(deps): update dependency google-api-core to v2.10.1 (#1353) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d210da445..e17b74bc6 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.21 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' -google-api-core==2.10.0 +google-api-core==2.10.1 google-auth==2.11.0 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.15.0 From 86aa3939344e76e9582a2e45c87a7203755a5ca9 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 15 Sep 2022 15:25:11 +0200 Subject: [PATCH 016/536] chore(deps): update all dependencies (#1354) --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e17b74bc6..ca8f79c3c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==22.1.0 -certifi==2022.6.15.2 +certifi==2022.9.14 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 64e5e36e3..8a86e1495 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.3 google-cloud-bigquery-storage==2.15.0 -google-auth-oauthlib==0.5.2 +google-auth-oauthlib==0.5.3 grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index a12770999..3009f1899 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.15.0 -google-auth-oauthlib==0.5.2 +google-auth-oauthlib==0.5.3 grpcio==1.48.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' From 1369a9d937b85d6a2a6bf9a672c71620648b1e3e Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 19 Sep 2022 17:18:35 -0400 Subject: [PATCH 017/536] Fix: Refactors code to account for a tdqm code deprecation (#1357) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * tests some options * refactors to use tqdm.* notation * refactors tqdm function calls to account for deprecation warning * refactors _tqdm_helpers to account for tqdm deprecation warnings * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removes an extraneous reference to ipywidgets * removes unneeded import * removes import and fixes linting error Co-authored-by: Owl Bot --- google/cloud/bigquery/_tqdm_helpers.py | 3 ++- tests/unit/test_table.py | 15 ++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index f2355ab3b..ae289d8a6 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -22,6 +22,7 @@ try: import tqdm # type: ignore + except ImportError: # pragma: NO COVER tqdm = None @@ -48,7 +49,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): if progress_bar_type == "tqdm": return tqdm.tqdm(desc=description, total=total, unit=unit) elif progress_bar_type == "tqdm_notebook": - return tqdm.tqdm_notebook(desc=description, total=total, unit=unit) + return tqdm.notebook.tqdm(desc=description, total=total, unit=unit) elif progress_bar_type == "tqdm_gui": return tqdm.tqdm_gui(desc=description, total=total, unit=unit) except (KeyError, TypeError): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index b5f2e58c6..fca43f1ee 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -45,7 +45,9 @@ geopandas = None try: - from tqdm import tqdm + import tqdm + from tqdm.std import TqdmDeprecationWarning + except (ImportError, AttributeError): # pragma: NO COVER tqdm = None @@ -2798,7 +2800,7 @@ def test_to_arrow_w_bqstorage_no_streams(self): @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") - @mock.patch("tqdm.tqdm_notebook") + @mock.patch("tqdm.notebook.tqdm") @mock.patch("tqdm.tqdm") def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock): from google.cloud.bigquery.schema import SchemaField @@ -3146,7 +3148,7 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") - @mock.patch("tqdm.tqdm_notebook") + @mock.patch("tqdm.notebook.tqdm") @mock.patch("tqdm.tqdm") def test_to_dataframe_progress_bar( self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock @@ -3249,7 +3251,7 @@ def test_to_dataframe_no_tqdm(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui", new=None) # will raise TypeError on call - @mock.patch("tqdm.tqdm_notebook", new=None) # will raise TypeError on call + @mock.patch("tqdm.notebook.tqdm", new=None) # will raise TypeError on call @mock.patch("tqdm.tqdm", new=None) # will raise TypeError on call def test_to_dataframe_tqdm_error(self): from google.cloud.bigquery.schema import SchemaField @@ -3281,7 +3283,10 @@ def test_to_dataframe_tqdm_error(self): # Warn that a progress bar was requested, but creating the tqdm # progress bar failed. for warning in warned: - self.assertIs(warning.category, UserWarning) + self.assertIn( + warning.category, + [UserWarning, DeprecationWarning, TqdmDeprecationWarning], + ) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): From 373442b05eca56eb61dce5e84daa29fe491129c7 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 20 Sep 2022 13:22:01 +0200 Subject: [PATCH 018/536] chore(deps): update all dependencies (#1358) * chore(deps): update all dependencies * pin matplotlib version for python 3.7 * pin matplotlib version for python 3.7 Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 5 +++-- samples/snippets/requirements.txt | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ca8f79c3c..5f0ee10ee 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -19,7 +19,7 @@ google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.3.3 googleapis-common-protos==1.56.4 -grpcio==1.48.1 +grpcio==1.49.0 idna==3.4 libcst==0.4.7 munch==2.5.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 8a86e1495..5ce01be2d 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,11 +1,12 @@ db-dtypes==1.0.3 google-cloud-bigquery-storage==2.15.0 google-auth-oauthlib==0.5.3 -grpcio==1.48.1 +grpcio==1.49.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' -matplotlib==3.5.3 +matplotlib==3.5.3; python_version == '3.7' +matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.4.4; python_version >= '3.8' pyarrow==9.0.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3009f1899..01bb94348 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,11 +2,12 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.15.0 google-auth-oauthlib==0.5.3 -grpcio==1.48.1 +grpcio==1.49.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' -matplotlib==3.5.3 +matplotlib==3.5.3; python_version == '3.7' +matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.4.4; python_version >= '3.8' pyarrow==9.0.0 From 02e35709073c8f066bc3c6c2b3ee1bf294111881 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 20 Sep 2022 17:25:43 +0200 Subject: [PATCH 019/536] chore(deps): update all dependencies (#1360) * chore(deps): update all dependencies * revert * revert * pin matplotlib for py37 Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 6 +++--- samples/snippets/requirements.txt | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5f0ee10ee..fafa43e9a 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,9 +12,9 @@ geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' google-api-core==2.10.1 -google-auth==2.11.0 +google-auth==2.11.1 google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.15.0 +google-cloud-bigquery-storage==2.16.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.3.3 @@ -26,7 +26,7 @@ munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' -pandas==1.4.4; python_version >= '3.8' +pandas==1.5.0; python_version >= '3.8' proto-plus==1.22.1 pyarrow==9.0.0 pyasn1==0.4.8 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 5ce01be2d..05bd5ef89 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,14 +1,14 @@ db-dtypes==1.0.3 -google-cloud-bigquery-storage==2.15.0 +google-cloud-bigquery-storage==2.16.0 google-auth-oauthlib==0.5.3 grpcio==1.49.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' -matplotlib==3.5.3; python_version == '3.7' +matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.4.4; python_version >= '3.8' +pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.2.1 typing-extensions==4.3.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 01bb94348..241fba27a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.0.3 google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.15.0 +google-cloud-bigquery-storage==2.16.0 google-auth-oauthlib==0.5.3 grpcio==1.49.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' -matplotlib==3.5.3; python_version == '3.7' +matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.4.4; python_version >= '3.8' +pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.2.1 typing-extensions==4.3.0 From 2c57533a1e50a64512c9211ccc94289d783ccef6 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 21 Sep 2022 16:50:49 +0200 Subject: [PATCH 020/536] chore(deps): update dependency db-dtypes to v1.0.4 (#1361) --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index fafa43e9a..6b14f90ab 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -6,7 +6,7 @@ click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.0.3 +db-dtypes==1.0.4 Fiona==1.8.21 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 05bd5ef89..dd61784f1 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.0.3 +db-dtypes==1.0.4 google-cloud-bigquery-storage==2.16.0 google-auth-oauthlib==0.5.3 grpcio==1.49.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 241fba27a..1e91a5ec7 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.0.3 +db-dtypes==1.0.4 google-cloud-bigquery==3.3.2 google-cloud-bigquery-storage==2.16.0 google-auth-oauthlib==0.5.3 From e11a75abbe224590af68a4fbc68d61763130202a Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 28 Sep 2022 09:34:08 -0400 Subject: [PATCH 021/536] chore: updates blacken process to ensure all samples are processed (#1367) --- noxfile.py | 10 +++++++++- owlbot.py | 7 ++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index d9883d069..0b0800d35 100644 --- a/noxfile.py +++ b/noxfile.py @@ -25,7 +25,15 @@ MYPY_VERSION = "mypy==0.910" PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==22.3.0" -BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") +BLACK_PATHS = ( + "docs", + "google", + "samples", + "samples/tests", + "tests", + "noxfile.py", + "setup.py", +) DEFAULT_PYTHON_VERSION = "3.8" SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.10"] diff --git a/owlbot.py b/owlbot.py index 4d287ac46..b887449ff 100644 --- a/owlbot.py +++ b/owlbot.py @@ -13,12 +13,15 @@ # limitations under the License. """This script is used to synthesize generated parts of this library.""" +from pathlib import Path import textwrap import synthtool as s from synthtool import gcp from synthtool.languages import python +REPO_ROOT = Path(__file__).parent.absolute() + default_version = "v2" for library in s.get_staging_dirs(default_version): @@ -120,4 +123,6 @@ ), ) -s.shell.run(["nox", "-s", "blacken"], hide_output=False) +# s.shell.run(["nox", "-s", "blacken"], hide_output=False) +for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): + s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) \ No newline at end of file From 2b6fecbdbf936fba080f18723f90c34a45f9ab2c Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 29 Sep 2022 13:47:52 -0400 Subject: [PATCH 022/536] chore(main): release 3.3.3 (#1329) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(main): release 3.3.3 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update docs/conf.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * chore: also run blacken session in the root of the repo * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Anthonios Partheniou --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- owlbot.py | 4 ++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ba373179..1340fd396 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.3.3](https://github.com/googleapis/python-bigquery/compare/v3.3.2...v3.3.3) (2022-09-28) + + +### Bug Fixes + +* Refactors code to account for a tdqm code deprecation ([#1357](https://github.com/googleapis/python-bigquery/issues/1357)) ([1369a9d](https://github.com/googleapis/python-bigquery/commit/1369a9d937b85d6a2a6bf9a672c71620648b1e3e)) +* Validate opentelemetry span job attributes have values ([#1327](https://github.com/googleapis/python-bigquery/issues/1327)) ([8287af1](https://github.com/googleapis/python-bigquery/commit/8287af1299169546f847126f03ae04e48890139e)) + + +### Documentation + +* **samples:** uses function (create_job) more appropriate to the described sample intent ([5aeedaa](https://github.com/googleapis/python-bigquery/commit/5aeedaa2f4e6a0200d50521dfd90f39f9a24d0cc)) + ## [3.3.2](https://github.com/googleapis/python-bigquery/compare/v3.3.1...v3.3.2) (2022-08-16) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index eb307e8d6..c99682bc8 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.2" +__version__ = "3.3.3" diff --git a/owlbot.py b/owlbot.py index b887449ff..c2de31042 100644 --- a/owlbot.py +++ b/owlbot.py @@ -123,6 +123,6 @@ ), ) -# s.shell.run(["nox", "-s", "blacken"], hide_output=False) +s.shell.run(["nox", "-s", "blacken"], hide_output=False) for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): - s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) \ No newline at end of file + s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) From f13383a22d7b1a0a714dc1b1210ad970146bd094 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 29 Sep 2022 16:32:38 -0400 Subject: [PATCH 023/536] fix(deps): require protobuf >= 3.20.2 (#1369) * chore: exclude requirements.txt file from renovate-bot Source-Link: https://github.com/googleapis/synthtool/commit/f58d3135a2fab20e225d98741dbc06d57459b816 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:7a40313731a7cb1454eef6b33d3446ebb121836738dc3ab3d2d3ded5268c35b6 * update constraints files * fix(deps): require protobuf 3.20.2 Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou Co-authored-by: Chalmer Lowe --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/requirements.txt | 49 ++++++++++++++++++------------------- setup.py | 2 +- testing/constraints-3.7.txt | 2 +- 4 files changed, 27 insertions(+), 28 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index aa547962e..3815c983c 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:e09366bdf0fd9c8976592988390b24d53583dd9f002d476934da43725adbb978 + digest: sha256:7a40313731a7cb1454eef6b33d3446ebb121836738dc3ab3d2d3ded5268c35b6 diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 385f2d4d6..d15994bac 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -325,31 +325,30 @@ platformdirs==2.5.2 \ --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 # via virtualenv -protobuf==3.20.1 \ - --hash=sha256:06059eb6953ff01e56a25cd02cca1a9649a75a7e65397b5b9b4e929ed71d10cf \ - --hash=sha256:097c5d8a9808302fb0da7e20edf0b8d4703274d140fd25c5edabddcde43e081f \ - --hash=sha256:284f86a6207c897542d7e956eb243a36bb8f9564c1742b253462386e96c6b78f \ - --hash=sha256:32ca378605b41fd180dfe4e14d3226386d8d1b002ab31c969c366549e66a2bb7 \ - --hash=sha256:3cc797c9d15d7689ed507b165cd05913acb992d78b379f6014e013f9ecb20996 \ - --hash=sha256:62f1b5c4cd6c5402b4e2d63804ba49a327e0c386c99b1675c8a0fefda23b2067 \ - --hash=sha256:69ccfdf3657ba59569c64295b7d51325f91af586f8d5793b734260dfe2e94e2c \ - --hash=sha256:6f50601512a3d23625d8a85b1638d914a0970f17920ff39cec63aaef80a93fb7 \ - --hash=sha256:7403941f6d0992d40161aa8bb23e12575637008a5a02283a930addc0508982f9 \ - --hash=sha256:755f3aee41354ae395e104d62119cb223339a8f3276a0cd009ffabfcdd46bb0c \ - --hash=sha256:77053d28427a29987ca9caf7b72ccafee011257561259faba8dd308fda9a8739 \ - --hash=sha256:7e371f10abe57cee5021797126c93479f59fccc9693dafd6bd5633ab67808a91 \ - --hash=sha256:9016d01c91e8e625141d24ec1b20fed584703e527d28512aa8c8707f105a683c \ - --hash=sha256:9be73ad47579abc26c12024239d3540e6b765182a91dbc88e23658ab71767153 \ - --hash=sha256:adc31566d027f45efe3f44eeb5b1f329da43891634d61c75a5944e9be6dd42c9 \ - --hash=sha256:adfc6cf69c7f8c50fd24c793964eef18f0ac321315439d94945820612849c388 \ - --hash=sha256:af0ebadc74e281a517141daad9d0f2c5d93ab78e9d455113719a45a49da9db4e \ - --hash=sha256:cb29edb9eab15742d791e1025dd7b6a8f6fcb53802ad2f6e3adcb102051063ab \ - --hash=sha256:cd68be2559e2a3b84f517fb029ee611546f7812b1fdd0aa2ecc9bc6ec0e4fdde \ - --hash=sha256:cdee09140e1cd184ba9324ec1df410e7147242b94b5f8b0c64fc89e38a8ba531 \ - --hash=sha256:db977c4ca738dd9ce508557d4fce0f5aebd105e158c725beec86feb1f6bc20d8 \ - --hash=sha256:dd5789b2948ca702c17027c84c2accb552fc30f4622a98ab5c51fcfe8c50d3e7 \ - --hash=sha256:e250a42f15bf9d5b09fe1b293bdba2801cd520a9f5ea2d7fb7536d4441811d20 \ - --hash=sha256:ff8d8fa42675249bb456f5db06c00de6c2f4c27a065955917b28c4f15978b9c3 +protobuf==3.20.2 \ + --hash=sha256:03d76b7bd42ac4a6e109742a4edf81ffe26ffd87c5993126d894fe48a120396a \ + --hash=sha256:09e25909c4297d71d97612f04f41cea8fa8510096864f2835ad2f3b3df5a5559 \ + --hash=sha256:18e34a10ae10d458b027d7638a599c964b030c1739ebd035a1dfc0e22baa3bfe \ + --hash=sha256:291fb4307094bf5ccc29f424b42268640e00d5240bf0d9b86bf3079f7576474d \ + --hash=sha256:2c0b040d0b5d5d207936ca2d02f00f765906622c07d3fa19c23a16a8ca71873f \ + --hash=sha256:384164994727f274cc34b8abd41a9e7e0562801361ee77437099ff6dfedd024b \ + --hash=sha256:3cb608e5a0eb61b8e00fe641d9f0282cd0eedb603be372f91f163cbfbca0ded0 \ + --hash=sha256:5d9402bf27d11e37801d1743eada54372f986a372ec9679673bfcc5c60441151 \ + --hash=sha256:712dca319eee507a1e7df3591e639a2b112a2f4a62d40fe7832a16fd19151750 \ + --hash=sha256:7a5037af4e76c975b88c3becdf53922b5ffa3f2cddf657574a4920a3b33b80f3 \ + --hash=sha256:8228e56a865c27163d5d1d1771d94b98194aa6917bcfb6ce139cbfa8e3c27334 \ + --hash=sha256:84a1544252a933ef07bb0b5ef13afe7c36232a774affa673fc3636f7cee1db6c \ + --hash=sha256:84fe5953b18a383fd4495d375fe16e1e55e0a3afe7b4f7b4d01a3a0649fcda9d \ + --hash=sha256:9c673c8bfdf52f903081816b9e0e612186684f4eb4c17eeb729133022d6032e3 \ + --hash=sha256:9f876a69ca55aed879b43c295a328970306e8e80a263ec91cf6e9189243c613b \ + --hash=sha256:a9e5ae5a8e8985c67e8944c23035a0dff2c26b0f5070b2f55b217a1c33bbe8b1 \ + --hash=sha256:b4fdb29c5a7406e3f7ef176b2a7079baa68b5b854f364c21abe327bbeec01cdb \ + --hash=sha256:c184485e0dfba4dfd451c3bd348c2e685d6523543a0f91b9fd4ae90eb09e8422 \ + --hash=sha256:c9cdf251c582c16fd6a9f5e95836c90828d51b0069ad22f463761d27c6c19019 \ + --hash=sha256:e39cf61bb8582bda88cdfebc0db163b774e7e03364bbf9ce1ead13863e81e359 \ + --hash=sha256:e8fbc522303e09036c752a0afcc5c0603e917222d8bedc02813fd73b4b4ed804 \ + --hash=sha256:f34464ab1207114e73bba0794d1257c150a2b89b7a9faf504e00af7c9fd58978 \ + --hash=sha256:f52dabc96ca99ebd2169dadbe018824ebda08a795c7684a0b7d203a290f3adb0 # via # gcp-docuploader # gcp-releasetool diff --git a/setup.py b/setup.py index d8f2bb226..be02dc409 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3, <22.0.0dev", - "protobuf >= 3.19.0, <5.0.0dev", # For the legacy proto-based types. + "protobuf >= 3.20.2, <5.0.0dev", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "pyarrow >= 3.0.0, < 10.0dev", "requests >= 2.18.0, < 3.0.0dev", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 67313f6b8..3b07dc9fa 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -18,7 +18,7 @@ opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 pandas==1.1.0 proto-plus==1.22.0 -protobuf==3.19.0 +protobuf==3.20.2 pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.18.0 From 28deba62a63f7d120665a1962f26210d4be0aff4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 3 Oct 2022 12:20:55 -0400 Subject: [PATCH 024/536] chore(main): release 3.3.4 (#1371) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1340fd396..d5efd7dd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.3.4](https://github.com/googleapis/python-bigquery/compare/v3.3.3...v3.3.4) (2022-09-29) + + +### Bug Fixes + +* **deps:** Require protobuf >= 3.20.2 ([#1369](https://github.com/googleapis/python-bigquery/issues/1369)) ([f13383a](https://github.com/googleapis/python-bigquery/commit/f13383a22d7b1a0a714dc1b1210ad970146bd094)) + ## [3.3.3](https://github.com/googleapis/python-bigquery/compare/v3.3.2...v3.3.3) (2022-09-28) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index c99682bc8..3e1a9869c 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.3" +__version__ = "3.3.4" From 21cc525a86a06acfe73e5c5a74ec5f0b61e410f2 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Mon, 3 Oct 2022 20:56:22 -0400 Subject: [PATCH 025/536] docs: fix typos (#1372) --- google/cloud/bigquery/query.py | 2 +- google/cloud/bigquery/schema.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 0469cb271..944ad884e 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -795,7 +795,7 @@ def _key(self): Used to compute this instance's hashcode and evaluate equality. Returns: - Tuple: The contents of this :class:`~google.cloud.biquery.ArrayQueryParameter`. + Tuple: The contents of this :class:`~google.cloud.bigquery.ArrayQueryParameter`. """ return (self.name, self.struct_types, self.struct_values) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 5580a2ae9..1df78424d 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -144,7 +144,7 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": :meth:`to_api_repr`. Returns: - google.cloud.biquery.schema.SchemaField: The ``SchemaField`` object. + google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ field_type = api_repr["type"].upper() From 78db9ea966b5e10a8afe9e5d9fdbce3f21438bce Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 4 Oct 2022 16:05:14 +0200 Subject: [PATCH 026/536] chore(deps): update all dependencies (#1363) Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 14 +++++++------- samples/magics/requirements.txt | 6 +++--- samples/snippets/requirements.txt | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6b14f90ab..72dd950ea 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==22.1.0 -certifi==2022.9.14 +certifi==2022.9.24 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 @@ -12,14 +12,14 @@ geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' google-api-core==2.10.1 -google-auth==2.11.1 -google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.16.0 +google-auth==2.12.0 +google-cloud-bigquery==3.3.3 +google-cloud-bigquery-storage==2.16.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 -google-resumable-media==2.3.3 +google-resumable-media==2.4.0 googleapis-common-protos==1.56.4 -grpcio==1.49.0 +grpcio==1.49.1 idna==3.4 libcst==0.4.7 munch==2.5.0 @@ -34,7 +34,7 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.2.1 +pytz==2022.4 PyYAML==6.0 requests==2.28.1 rsa==4.9 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index dd61784f1..212de247c 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.4 -google-cloud-bigquery-storage==2.16.0 +google-cloud-bigquery-storage==2.16.1 google-auth-oauthlib==0.5.3 -grpcio==1.49.0 +grpcio==1.49.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' @@ -10,5 +10,5 @@ matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.2.1 +pytz==2022.4 typing-extensions==4.3.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1e91a5ec7..d5a469644 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,8 +1,8 @@ db-dtypes==1.0.4 -google-cloud-bigquery==3.3.2 -google-cloud-bigquery-storage==2.16.0 +google-cloud-bigquery==3.3.3 +google-cloud-bigquery-storage==2.16.1 google-auth-oauthlib==0.5.3 -grpcio==1.49.0 +grpcio==1.49.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' @@ -11,5 +11,5 @@ matplotlib==3.6.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.2.1 +pytz==2022.4 typing-extensions==4.3.0 From 2afd278febe1eb247adc6278ab59903962a5bb6c Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 5 Oct 2022 14:17:58 -0400 Subject: [PATCH 027/536] Fix: refactor to adapt to changes to shapely dependency (#1376) * fix: refactored to account for changes in dependency * Removes comment and ensures linting success * refactor to use loads() function * fix: refactors to account for changes to shapely dependency * fix: refactors to account for changes to shapely dependency * blacken the code * add mypy ignore flag for shapely import --- google/cloud/bigquery/_pandas_helpers.py | 12 +++--------- google/cloud/bigquery/table.py | 4 ++-- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index cc0ee75ff..0d05f53a3 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -57,15 +57,9 @@ if pandas is not None: # pragma: NO COVER def _to_wkb(): - # Create a closure that: - # - Adds a not-null check. This allows the returned function to - # be used directly with apply, unlike `shapely.wkb.dumps`. - # - Avoid extra work done by `shapely.wkb.dumps` that we don't need. - # - Caches the WKBWriter (and write method lookup :) ) - # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace. - from shapely.geos import WKBWriter, lgeos # type: ignore - - write = WKBWriter(lgeos).write + from shapely import wkb # type: ignore + + write = wkb.dumps notnull = pandas.notnull def _to_wkb(v): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 72eb1baf6..8e9e248c4 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -39,11 +39,11 @@ _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" try: - import shapely.geos # type: ignore + import shapely # type: ignore except ImportError: shapely = None else: - _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read + _read_wkt = shapely.wkt.loads import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator From 3e4a074a981eb2920c5f9a711c253565d4844858 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 7 Oct 2022 16:37:03 -0400 Subject: [PATCH 028/536] fix(deps): allow protobuf 3.19.5 (#1379) * fix(deps): allow protobuf 3.19.5 * explicitly exclude protobuf 4.21.0 --- setup.py | 2 +- testing/constraints-3.7.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index be02dc409..695ffd7d3 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3, <22.0.0dev", - "protobuf >= 3.20.2, <5.0.0dev", # For the legacy proto-based types. + "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "pyarrow >= 3.0.0, < 10.0dev", "requests >= 2.18.0, < 3.0.0dev", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 3b07dc9fa..c9e40d823 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -18,7 +18,7 @@ opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 pandas==1.1.0 proto-plus==1.22.0 -protobuf==3.20.2 +protobuf==3.19.5 pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.18.0 From 1c0642c55e21f04b15a56cec87fe4eb65bbfbabe Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 10 Oct 2022 18:22:07 +0200 Subject: [PATCH 029/536] chore(deps): update all dependencies (#1380) --- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 72dd950ea..d6b2c3ed9 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.8.21 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' -google-api-core==2.10.1 +google-api-core==2.10.2 google-auth==2.12.0 google-cloud-bigquery==3.3.3 google-cloud-bigquery-storage==2.16.1 @@ -40,6 +40,6 @@ requests==2.28.1 rsa==4.9 Shapely==1.8.4 six==1.16.0 -typing-extensions==4.3.0 +typing-extensions==4.4.0 typing-inspect==0.8.0 urllib3==1.26.12 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 212de247c..f141b5420 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -6,9 +6,9 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.0; python_version >= '3.8' +matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.4 -typing-extensions==4.3.0 +typing-extensions==4.4.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d5a469644..0affa1c19 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -7,9 +7,9 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.0; python_version >= '3.8' +matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.0; python_version >= '3.8' pyarrow==9.0.0 pytz==2022.4 -typing-extensions==4.3.0 +typing-extensions==4.4.0 From 4fce1d93b1763703b115a0480a2b97021786aff7 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 10 Oct 2022 15:32:17 -0400 Subject: [PATCH 030/536] chore: release 3.3.6 (#1381) Release-As: 3.3.6 We're going to use 3.3.5 for a break-fix release. From 506f781c2dd775193336ab9432f32148250ed81d Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Tue, 11 Oct 2022 13:24:40 -0500 Subject: [PATCH 031/536] feat: reconfigure tqdm progress bar in %%bigquery magic (#1355) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add bigquery job id to tqdm progress bar description Change-Id: I2add62e3cdd5f25f88ace2d08f212796918158b6 * write to sys.stdout instead of sys.stderr Change-Id: I6c4001608af1bd8c305c53c6089d64f99605bd8c * configure progress bar Change-Id: I5788448d580b53898e75fba68ff5d5a9d12e33d6 * tqdm.notebook Change-Id: I87e45085b7535083327a5fe2e51dba4b6411db00 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * reinclude ipywidgets Change-Id: Ibe0fc01db05fcfaacdbe0c074b841ead3a39afc9 * reinclude ipywidgets Change-Id: I56f8f98853e83ead0e0ca743c03407a521370233 * change test assertions to tqdm_notebook Change-Id: I2d55e529142ad0024ef4a98c2f15d10a73535380 * change test assertions in test_magics Change-Id: I7961ff1c5e9c54930d077e67ef9e01d79e351c5f * remove ipywidgets Change-Id: I183e277fc7be8797c85d6802f4f8c3947871d4cc * update assertions in test Change-Id: I3b4a1b9460227ca49bf344362efbcc2c895d804d * update method args in query.py and table.py Change-Id: I9a2bf2b54579668ff36ed992e599f4c7fabe918c * string formatting * fix typo * fix incorrect import structure for tqdm notebook * change default decorator back to tqdm * modify system test * add ipywidgets package for tqdm.notebook feature, set tqdm.notebook as default decorator for bq magic * change test assertion in test_query_pandas * revert test changes * reformat import statement * reformat import statement * remove timeouterror side effect * add tqdm mock patch * Revert "reformat import statement" This reverts commit 4114221527507fb270ef0533aa5b8f985f3b7779. * Revert "add tqdm mock patch" This reverts commit ef809a082ae3e4684298764096ac634b0c0281bc. * add timeout side effect * fix assertion * fix import * change mock patch to tqdm * move assertion * move assertions * add timeout side effect * adjust import statement, mock.patch tqdm * create fixture * revert import change * add import from helper * fix linting * remove unused imort * set ipywidgets version to 7.7.1 * set ipywidgets version to 7.7.1 * set ipywidgets version to 7.7.1 * bump sphinx version * bump sphinx version Co-authored-by: Owl Bot --- google/cloud/bigquery/_tqdm_helpers.py | 29 +++++++---- google/cloud/bigquery/job/query.py | 4 +- google/cloud/bigquery/magics/magics.py | 21 ++++---- google/cloud/bigquery/table.py | 12 ++--- noxfile.py | 10 ++-- samples/magics/requirements.txt | 1 + samples/snippets/requirements.txt | 1 + setup.py | 1 + testing/constraints-3.7.txt | 1 + tests/system/test_magics.py | 3 +- tests/unit/job/test_query_pandas.py | 71 +++++++++++++------------- tests/unit/test_magics.py | 5 +- 12 files changed, 87 insertions(+), 72 deletions(-) diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index ae289d8a6..456ca2530 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -15,6 +15,7 @@ """Shared helper functions for tqdm progress bar.""" import concurrent.futures +import sys import time import typing from typing import Optional @@ -22,6 +23,7 @@ try: import tqdm # type: ignore + import tqdm.notebook as notebook # type: ignore except ImportError: # pragma: NO COVER tqdm = None @@ -47,9 +49,22 @@ def get_progress_bar(progress_bar_type, description, total, unit): try: if progress_bar_type == "tqdm": - return tqdm.tqdm(desc=description, total=total, unit=unit) + return tqdm.tqdm( + bar_format="{l_bar}{bar}|", + colour="green", + desc=description, + file=sys.stdout, + total=total, + unit=unit, + ) elif progress_bar_type == "tqdm_notebook": - return tqdm.notebook.tqdm(desc=description, total=total, unit=unit) + return notebook.tqdm( + bar_format="{l_bar}{bar}|", + desc=description, + file=sys.stdout, + total=total, + unit=unit, + ) elif progress_bar_type == "tqdm_gui": return tqdm.tqdm_gui(desc=description, total=total, unit=unit) except (KeyError, TypeError): @@ -80,7 +95,7 @@ def wait_for_query( """ default_total = 1 current_stage = None - start_time = time.time() + start_time = time.perf_counter() progress_bar = get_progress_bar( progress_bar_type, "Query is running", default_total, "query" @@ -95,11 +110,7 @@ def wait_for_query( current_stage = query_job.query_plan[i] progress_bar.total = len(query_job.query_plan) progress_bar.set_description( - "Query executing stage {} and status {} : {:0.2f}s".format( - current_stage.name, - current_stage.status, - time.time() - start_time, - ), + f"Query executing stage {current_stage.name} and status {current_stage.status} : {time.perf_counter() - start_time:.2f}s" ) try: query_result = query_job.result( @@ -107,7 +118,7 @@ def wait_for_query( ) progress_bar.update(default_total) progress_bar.set_description( - "Query complete after {:0.2f}s".format(time.time() - start_time), + f"Job ID {query_job.job_id} successfully executed", ) break except concurrent.futures.TimeoutError: diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index c2d304e30..b0286deae 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1556,9 +1556,9 @@ def to_arrow( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 14819aa59..613cc1b58 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -125,7 +125,7 @@ def __init__(self): self._default_query_job_config = bigquery.QueryJobConfig() self._bigquery_client_options = client_options.ClientOptions() self._bqstorage_client_options = client_options.ClientOptions() - self._progress_bar_type = "tqdm" + self._progress_bar_type = "tqdm_notebook" @property def credentials(self): @@ -269,7 +269,7 @@ def progress_bar_type(self): Manually setting the progress_bar_type: >>> from google.cloud.bigquery import magics - >>> magics.context.progress_bar_type = "tqdm" + >>> magics.context.progress_bar_type = "tqdm_notebook" """ return self._progress_bar_type @@ -286,7 +286,7 @@ def _handle_error(error, destination_var=None): Args: error (Exception): - An exception that ocurred during the query exectution. + An exception that ocurred during the query execution. destination_var (Optional[str]): The name of the IPython session variable to store the query job. """ @@ -329,22 +329,25 @@ def _run_query(client, query, job_config=None): Query complete after 2.07s 'bf633912-af2c-4780-b568-5d868058632b' """ - start_time = time.time() + start_time = time.perf_counter() query_job = client.query(query, job_config=job_config) if job_config and job_config.dry_run: return query_job - print("Executing query with job ID: {}".format(query_job.job_id)) + print(f"Executing query with job ID: {query_job.job_id}") while True: - print("\rQuery executing: {:0.2f}s".format(time.time() - start_time), end="") + print( + f"\rQuery executing: {time.perf_counter() - start_time:.2f}s".format(), + end="", + ) try: query_job.result(timeout=0.5) break except futures.TimeoutError: continue - print("\nQuery complete after {:0.2f}s".format(time.time() - start_time)) + print(f"\nJob ID {query_job.job_id} successfully executed") return query_job @@ -365,7 +368,7 @@ def _create_dataset_if_necessary(client, dataset_id): pass dataset = bigquery.Dataset(dataset_reference) dataset.location = client.location - print("Creating dataset: {}".format(dataset_id)) + print(f"Creating dataset: {dataset_id}") dataset = client.create_dataset(dataset) @@ -500,7 +503,7 @@ def _create_dataset_if_necessary(client, dataset_id): default=None, help=( "Sets progress bar type to display a progress bar while executing the query." - "Defaults to use tqdm. Install the ``tqdm`` package to use this feature." + "Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature." ), ) def _cell_magic(line, query): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 8e9e248c4..2065c5fd2 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1728,9 +1728,9 @@ def to_arrow( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a @@ -1921,9 +1921,9 @@ def to_dataframe( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a @@ -2075,9 +2075,9 @@ def to_geodataframe( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. + to :data:`sys.stdout`. ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a + Use the :func:`tqdm.notebook.tqdm` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a diff --git a/noxfile.py b/noxfile.py index 0b0800d35..a91e60a5f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -81,7 +81,7 @@ def default(session, install_extras=True): ) if install_extras and session.python == "3.10": - install_target = ".[bqstorage,pandas,tqdm,opentelemetry]" + install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" elif install_extras: install_target = ".[all]" else: @@ -186,7 +186,7 @@ def system(session): session.install("google-cloud-datacatalog", "-c", constraints_path) if session.python == "3.10": - extras = "[bqstorage,pandas,tqdm,opentelemetry]" + extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) @@ -235,7 +235,7 @@ def snippets(session): session.install("grpcio", "-c", constraints_path) if session.python == "3.10": - extras = "[bqstorage,pandas,tqdm,opentelemetry]" + extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) @@ -387,7 +387,7 @@ def blacken(session): def docs(session): """Build the docs.""" - session.install("recommonmark", "sphinx==4.0.1", "sphinx_rtd_theme") + session.install("recommonmark", "sphinx==4.0.2", "sphinx_rtd_theme") session.install("google-cloud-storage") session.install("-e", ".[all]") @@ -412,7 +412,7 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "sphinx==4.0.2", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index f141b5420..da7131711 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -2,6 +2,7 @@ db-dtypes==1.0.4 google-cloud-bigquery-storage==2.16.1 google-auth-oauthlib==0.5.3 grpcio==1.49.1 +ipywidgets==7.7.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0affa1c19..4640dc42f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -3,6 +3,7 @@ google-cloud-bigquery==3.3.3 google-cloud-bigquery-storage==2.16.1 google-auth-oauthlib==0.5.3 grpcio==1.49.1 +ipywidgets==7.7.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' diff --git a/setup.py b/setup.py index 695ffd7d3..119ccb0af 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ # See: https://github.com/googleapis/python-bigquery/issues/757 "bqstorage": [], "pandas": ["pandas>=1.0.0", "db-dtypes>=0.3.0,<2.0.0dev"], + "ipywidgets": ["ipywidgets==7.7.1"], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "ipython": ["ipython>=7.0.1,!=8.1.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index c9e40d823..ecce2c7cd 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -12,6 +12,7 @@ google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 grpcio==1.47.0 +ipywidgets==7.7.1 ipython==7.0.1 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 diff --git a/tests/system/test_magics.py b/tests/system/test_magics.py index 78c15cb50..3d761cd35 100644 --- a/tests/system/test_magics.py +++ b/tests/system/test_magics.py @@ -71,8 +71,7 @@ def test_bigquery_magic(ipython_interactive): # Removes blanks & terminal code (result of display clearing) updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) assert re.match("Executing query with job ID: .*", updates[0]) - assert all(re.match("Query executing: .*s", line) for line in updates[1:-1]) - assert re.match("Query complete after .*s", updates[-1]) + assert (re.match("Query executing: .*s", line) for line in updates[1:-1]) assert isinstance(result, pandas.DataFrame) assert len(result) == 10 # verify row count assert list(result) == ["url", "view_count"] # verify column names diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 84aab3aca..a45401664 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -37,7 +37,7 @@ except (ImportError, AttributeError): # pragma: NO COVER geopandas = None try: - from tqdm import tqdm + import tqdm except (ImportError, AttributeError): # pragma: NO COVER tqdm = None @@ -301,7 +301,8 @@ def test_to_arrow_max_results_no_progress_bar(): @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_arrow_w_tqdm_w_query_plan(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_arrow_w_tqdm_w_query_plan(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -344,20 +345,20 @@ def test_to_arrow_w_tqdm_w_query_plan(): row_iterator, ], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 3 + assert tqdm_mock.call_count == 3 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_arrow_w_tqdm_w_pending_status(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_arrow_w_tqdm_w_pending_status(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -396,20 +397,20 @@ def test_to_arrow_w_tqdm_w_pending_status(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 2 + assert tqdm_mock.call_count == 2 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_arrow_w_tqdm_wo_query_plan(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_arrow_w_tqdm_wo_query_plan(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -439,14 +440,13 @@ def test_to_arrow_w_tqdm_wo_query_plan(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 2 + assert tqdm_mock.call_count == 2 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called() + tqdm_mock.assert_called() def _make_job(schema=(), rows=()): @@ -720,7 +720,7 @@ def test_to_dataframe_column_date_dtypes(): @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -@mock.patch("tqdm.tqdm") +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_dataframe_with_progress_bar(tqdm_mock): from google.cloud.bigquery.job import QueryJob as target_class @@ -744,14 +744,15 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): job = target_class.from_api_repr(begun_resource, client) job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False) - tqdm_mock.assert_not_called() + tqdm_mock.tqdm.assert_not_called() job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - tqdm_mock.assert_called() + tqdm_mock.tqdm.assert_called() @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_dataframe_w_tqdm_pending(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_dataframe_w_tqdm_pending(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -780,7 +781,7 @@ def test_to_dataframe_w_tqdm_pending(): job._properties["statistics"] = { "query": { "queryPlan": [ - {"name": "S00: Input", "id": "0", "status": "PRNDING"}, + {"name": "S00: Input", "id": "0", "status": "PENDING"}, {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, ] }, @@ -792,21 +793,21 @@ def test_to_dataframe_w_tqdm_pending(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 2 + assert tqdm_mock.call_count == 2 assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df) == ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_dataframe_w_tqdm(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_dataframe_w_tqdm(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -852,20 +853,21 @@ def test_to_dataframe_w_tqdm(): ], ) - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - assert result_patch_tqdm.call_count == 3 + assert tqdm_mock.call_count == 3 assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df), ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with( + tqdm_mock.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None ) @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") -def test_to_dataframe_w_tqdm_max_results(): +@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") +def test_to_dataframe_w_tqdm_max_results(tqdm_mock): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField @@ -901,16 +903,13 @@ def test_to_dataframe_w_tqdm_max_results(): "google.cloud.bigquery.job.QueryJob.result", side_effect=[concurrent.futures.TimeoutError, row_iterator], ) - - with result_patch as result_patch_tqdm, reload_patch: + with result_patch as tqdm_mock, reload_patch: job.to_dataframe( progress_bar_type="tqdm", create_bqstorage_client=False, max_results=3 ) - assert result_patch_tqdm.call_count == 2 - result_patch_tqdm.assert_called_with( - timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 - ) + assert tqdm_mock.call_count == 2 + tqdm_mock.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index ea8fe568f..fdfb16d16 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -278,7 +278,6 @@ def test__run_query(): assert len(execution_updates) == 3 # one update per API response for line in execution_updates: assert re.match("Query executing: .*s", line) - assert re.match("Query complete after .*s", updates[-1]) def test__run_query_dry_run_without_errors_is_silent(): @@ -597,7 +596,7 @@ def warning_match(warning): query_job_mock.to_dataframe.assert_called_once_with( bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, - progress_bar_type="tqdm", + progress_bar_type="tqdm_notebook", ) assert isinstance(return_value, pandas.DataFrame) @@ -641,7 +640,7 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): query_job_mock.to_dataframe.assert_called_once_with( bqstorage_client=None, create_bqstorage_client=False, - progress_bar_type="tqdm", + progress_bar_type="tqdm_notebook", ) assert isinstance(return_value, pandas.DataFrame) From e39833673582e4a7a34103cfc45603932c9c33b3 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 19 Oct 2022 13:42:02 -0400 Subject: [PATCH 032/536] fix(deps): require requests>=2.21.0 (#1388) --- setup.py | 2 +- testing/constraints-3.7.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 119ccb0af..abed852a8 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "pyarrow >= 3.0.0, < 10.0dev", - "requests >= 2.18.0, < 3.0.0dev", + "requests >= 2.21.0, < 3.0.0dev", ] extras = { # Keep the no-op bqstorage extra for backward compatibility. diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index ecce2c7cd..57928714f 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -22,7 +22,7 @@ proto-plus==1.22.0 protobuf==3.19.5 pyarrow==3.0.0 python-dateutil==2.7.3 -requests==2.18.0 +requests==2.21.0 Shapely==1.6.4.post2 six==1.13.0 tqdm==4.7.4 From 36c4a63505cb2edcfa5c6dd0307265271c33bfc8 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 19 Oct 2022 20:23:06 +0200 Subject: [PATCH 033/536] chore(deps): update all dependencies (#1382) Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 16 ++++++++-------- samples/magics/requirements.txt | 10 +++++----- samples/snippets/requirements.txt | 12 ++++++------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d6b2c3ed9..ef50fafb6 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -7,26 +7,26 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.0.4 -Fiona==1.8.21 +Fiona==1.8.22 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.11.1; python_version >= '3.8' google-api-core==2.10.2 -google-auth==2.12.0 -google-cloud-bigquery==3.3.3 -google-cloud-bigquery-storage==2.16.1 +google-auth==2.13.0 +google-cloud-bigquery==3.3.5 +google-cloud-bigquery-storage==2.16.2 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.0 googleapis-common-protos==1.56.4 -grpcio==1.49.1 +grpcio==1.50.0 idna==3.4 libcst==0.4.7 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' -pandas==1.5.0; python_version >= '3.8' +pandas==1.5.1; python_version >= '3.8' proto-plus==1.22.1 pyarrow==9.0.0 pyasn1==0.4.8 @@ -34,11 +34,11 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.4 +pytz==2022.5 PyYAML==6.0 requests==2.28.1 rsa==4.9 -Shapely==1.8.4 +Shapely==1.8.5.post1 six==1.16.0 typing-extensions==4.4.0 typing-inspect==0.8.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index da7131711..bdd026ce5 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.0.4 -google-cloud-bigquery-storage==2.16.1 +google-cloud-bigquery-storage==2.16.2 google-auth-oauthlib==0.5.3 -grpcio==1.49.1 -ipywidgets==7.7.1 +grpcio==1.50.0 +ipywidgets==8.0.2 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.0; python_version >= '3.8' +pandas==1.5.1; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.4 +pytz==2022.5 typing-extensions==4.4.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4640dc42f..ebf892279 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.0.4 -google-cloud-bigquery==3.3.3 -google-cloud-bigquery-storage==2.16.1 +google-cloud-bigquery==3.3.5 +google-cloud-bigquery-storage==2.16.2 google-auth-oauthlib==0.5.3 -grpcio==1.49.1 -ipywidgets==7.7.1 +grpcio==1.50.0 +ipywidgets==8.0.2 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.5.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.0; python_version >= '3.8' +pandas==1.5.1; python_version >= '3.8' pyarrow==9.0.0 -pytz==2022.4 +pytz==2022.5 typing-extensions==4.4.0 From a80f436f2e75a8fb680316f17a22eecb31a7101d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 2 Nov 2022 10:36:09 -0400 Subject: [PATCH 034/536] fix: corrects test for non-existent attribute (#1395) * fix: corrects test for non-existent attribute * updates import statement to fix linting issue * updates a test to check for Python version * updates comments --- google/cloud/bigquery/table.py | 3 ++- samples/geography/requirements.txt | 2 +- tests/unit/test_table.py | 6 +++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 2065c5fd2..4fd77dd21 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -40,10 +40,11 @@ try: import shapely # type: ignore + from shapely import wkt # type: ignore except ImportError: shapely = None else: - _read_wkt = shapely.wkt.loads + _read_wkt = wkt.loads import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ef50fafb6..798de6bb5 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -10,7 +10,7 @@ db-dtypes==1.0.4 Fiona==1.8.22 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' -geopandas==0.11.1; python_version >= '3.8' +geopandas==0.12.1; python_version >= '3.8' google-api-core==2.10.2 google-auth==2.13.0 google-cloud-bigquery==3.3.5 diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index fca43f1ee..f542c7523 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -15,6 +15,7 @@ import datetime import logging import re +from sys import version_info import time import types import unittest @@ -1969,7 +1970,10 @@ def test_to_geodataframe(self): df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) self.assertEqual(len(df), 0) # verify the number of rows - self.assertIsNone(df.crs) + if version_info.major == 3 and version_info.minor > 7: + assert not hasattr(df, "crs") # used with Python > 3.7 + else: + self.assertIsNone(df.crs) # used with Python == 3.7 class TestRowIterator(unittest.TestCase): From c898546d3292f9ec1ba6120cd3f9e2805aa087bb Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 2 Nov 2022 14:47:58 -0400 Subject: [PATCH 035/536] fix(deps): allow pyarrow < 11 (#1393) Co-authored-by: Chalmer Lowe --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index abed852a8..c8bf640c2 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ "packaging >= 14.3, <22.0.0dev", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", - "pyarrow >= 3.0.0, < 10.0dev", + "pyarrow >= 3.0.0, < 11.0dev", "requests >= 2.21.0, < 3.0.0dev", ] extras = { From 5d3e5d36d6ff492ba6b76018a4d832e67a2c46a6 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 4 Nov 2022 11:39:06 -0400 Subject: [PATCH 036/536] chore(main): release 3.3.6 (#1375) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d5efd7dd6..869d063e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,32 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.3.6](https://github.com/googleapis/python-bigquery/compare/v3.3.4...v3.3.6) (2022-11-02) + + +### Features + +* Reconfigure tqdm progress bar in %%bigquery magic ([#1355](https://github.com/googleapis/python-bigquery/issues/1355)) ([506f781](https://github.com/googleapis/python-bigquery/commit/506f781c2dd775193336ab9432f32148250ed81d)) + + +### Bug Fixes + +* Corrects test for non-existent attribute ([#1395](https://github.com/googleapis/python-bigquery/issues/1395)) ([a80f436](https://github.com/googleapis/python-bigquery/commit/a80f436f2e75a8fb680316f17a22eecb31a7101d)) +* **deps:** Allow protobuf 3.19.5 ([#1379](https://github.com/googleapis/python-bigquery/issues/1379)) ([3e4a074](https://github.com/googleapis/python-bigquery/commit/3e4a074a981eb2920c5f9a711c253565d4844858)) +* **deps:** Allow pyarrow < 11 ([#1393](https://github.com/googleapis/python-bigquery/issues/1393)) ([c898546](https://github.com/googleapis/python-bigquery/commit/c898546d3292f9ec1ba6120cd3f9e2805aa087bb)) +* **deps:** Require requests>=2.21.0 ([#1388](https://github.com/googleapis/python-bigquery/issues/1388)) ([e398336](https://github.com/googleapis/python-bigquery/commit/e39833673582e4a7a34103cfc45603932c9c33b3)) +* Refactor to adapt to changes to shapely dependency ([#1376](https://github.com/googleapis/python-bigquery/issues/1376)) ([2afd278](https://github.com/googleapis/python-bigquery/commit/2afd278febe1eb247adc6278ab59903962a5bb6c)) + + +### Documentation + +* Fix typos ([#1372](https://github.com/googleapis/python-bigquery/issues/1372)) ([21cc525](https://github.com/googleapis/python-bigquery/commit/21cc525a86a06acfe73e5c5a74ec5f0b61e410f2)) + + +### Miscellaneous Chores + +* release 3.3.6 ([4fce1d9](https://github.com/googleapis/python-bigquery/commit/4fce1d93b1763703b115a0480a2b97021786aff7)) + ## [3.3.4](https://github.com/googleapis/python-bigquery/compare/v3.3.3...v3.3.4) (2022-09-29) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 3e1a9869c..43360a201 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.4" +__version__ = "3.3.6" From 931285ff85842ab07a0ef2ff9db808181ea3c5e4 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Mon, 14 Nov 2022 16:26:37 -0600 Subject: [PATCH 037/536] feat: add `reference_file_schema_uri` to LoadJobConfig, ExternalConfig (#1399) * feat: add 'reference_file_schema_uri' to LoadJobConfig and ExternalConfig --- google/cloud/bigquery/external_config.py | 14 ++ google/cloud/bigquery/job/load.py | 21 +++ testing/constraints-3.7.txt | 2 +- tests/system/test_client.py | 203 +++++++++++++++++++++++ tests/unit/job/test_base.py | 5 +- tests/unit/job/test_load.py | 12 ++ tests/unit/test_external_config.py | 6 + 7 files changed, 258 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 640b2d16b..bd60e4ef1 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -756,6 +756,20 @@ def hive_partitioning(self, value): prop = value.to_api_repr() if value is not None else None self._properties["hivePartitioningOptions"] = prop + @property + def reference_file_schema_uri(self): + """Optional[str]: + When creating an external table, the user can provide a reference file with the + table schema. This is enabled for the following formats: + + AVRO, PARQUET, ORC + """ + return self._properties.get("referenceFileSchemaUri") + + @reference_file_schema_uri.setter + def reference_file_schema_uri(self, value): + self._properties["referenceFileSchemaUri"] = value + @property def ignore_unknown_values(self): """bool: If :data:`True`, extra values that are not represented in the diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index e4b44395e..5c7f26841 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -379,6 +379,20 @@ def range_partitioning(self, value): ) self._set_sub_prop("rangePartitioning", resource) + @property + def reference_file_schema_uri(self): + """Optional[str]: + When creating an external table, the user can provide a reference file with the + table schema. This is enabled for the following formats: + + AVRO, PARQUET, ORC + """ + return self._get_sub_prop("referenceFileSchemaUri") + + @reference_file_schema_uri.setter + def reference_file_schema_uri(self, value): + return self._set_sub_prop("referenceFileSchemaUri", value) + @property def schema(self): """Optional[Sequence[Union[ \ @@ -651,6 +665,13 @@ def quote_character(self): """ return self._configuration.quote_character + @property + def reference_file_schema_uri(self): + """See: + attr:`google.cloud.bigquery.job.LoadJobConfig.reference_file_schema_uri`. + """ + return self._configuration.reference_file_schema_uri + @property def skip_leading_rows(self): """See diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 57928714f..2c5b169db 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -25,4 +25,4 @@ python-dateutil==2.7.3 requests==2.21.0 Shapely==1.6.4.post2 six==1.13.0 -tqdm==4.7.4 +tqdm==4.7.4 \ No newline at end of file diff --git a/tests/system/test_client.py b/tests/system/test_client.py index c99ee1c72..152bb8144 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -97,6 +97,20 @@ ), ] +SOURCE_URIS_AVRO = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro", +] +SOURCE_URIS_PARQUET = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.parquet", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.parquet", +] +REFERENCE_FILE_SCHEMA_URI_AVRO = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro" +REFERENCE_FILE_SCHEMA_URI_PARQUET = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet" + + # The VPC-SC team maintains a mirror of the GCS bucket used for code # samples. The public bucket crosses the configured security boundary. # See: https://github.com/googleapis/google-cloud-python/issues/8550 @@ -1052,6 +1066,195 @@ def test_load_table_from_file_w_explicit_location(self): table_ref, "gs://{}/letters-us.csv".format(bucket_name), location="US" ).result() + def test_create_external_table_with_reference_file_schema_uri_avro(self): + client = Config.CLIENT + dataset_id = _make_dataset_id("external_reference_file_avro") + self.temp_dataset(dataset_id) + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_avro" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIs` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create external data configuration + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) + external_config.source_uris = SOURCE_URIS_AVRO + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + + table = bigquery.Table(table_ref) + table.external_data_configuration = external_config + + table = client.create_table(table) + + # Get table created by the create_table API call + generated_table = client.get_table(table_ref) + + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + generated_table.external_data_configuration._properties[ + "referenceFileSchemaUri" + ], + REFERENCE_FILE_SCHEMA_URI_AVRO, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + + def test_load_table_from_uri_with_reference_file_schema_uri_avro(self): + dataset_id = _make_dataset_id("test_reference_file_avro") + self.temp_dataset(dataset_id) + client = Config.CLIENT + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_avro" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIS` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create load job configuration + load_job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.AVRO + ) + load_job_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + + load_job = client.load_table_from_uri( + source_uris=SOURCE_URIS_AVRO, + destination=table_ref, + job_config=load_job_config, + ) + # Wait for load job to complete + result = load_job.result() + + # Get table created by the load job + generated_table = client.get_table(table_ref) + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + result._properties["configuration"]["load"]["referenceFileSchemaUri"], + REFERENCE_FILE_SCHEMA_URI_AVRO, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + + def test_create_external_table_with_reference_file_schema_uri_parquet(self): + client = Config.CLIENT + dataset_id = _make_dataset_id("external_table_ref_file_parquet") + self.temp_dataset(dataset_id) + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_parquet" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIS` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create external data configuration + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.PARQUET) + external_config.source_uris = SOURCE_URIS_PARQUET + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_PARQUET + + table = bigquery.Table(table_ref) + table.external_data_configuration = external_config + + table = client.create_table(table) + + # Get table created by the create_table API call + generated_table = client.get_table(table_ref) + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + generated_table.external_data_configuration._properties[ + "referenceFileSchemaUri" + ], + REFERENCE_FILE_SCHEMA_URI_PARQUET, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + + def test_load_table_from_uri_with_reference_file_schema_uri_parquet(self): + dataset_id = _make_dataset_id("test_reference_file_parquet") + self.temp_dataset(dataset_id) + client = Config.CLIENT + dataset_ref = bigquery.DatasetReference(client.project, dataset_id) + table_id = "test_ref_file_parquet" + table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id) + + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + + # By default, the table should have the c-twitter schema because it is lexicographically last + # in the `SOURCE_URIS` list: + # a-twitter schema: (username, tweet, timestamp, likes) + # b-twitter schema: (username, tweet, timestamp) + # c-twitter schema: (username, tweet) + + # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema + + # Create load job configuration + load_job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.PARQUET + ) + load_job_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_PARQUET + + load_job = client.load_table_from_uri( + source_uris=SOURCE_URIS_PARQUET, + destination=table_ref, + job_config=load_job_config, + ) + # Wait for load job to complete + result = load_job.result() + + # Get table created by the load job + generated_table = client.get_table(table_ref) + self.assertEqual(generated_table.schema, expected_schema) + self.assertEqual( + result._properties["configuration"]["load"]["referenceFileSchemaUri"], + REFERENCE_FILE_SCHEMA_URI_PARQUET, + ) + + # Clean up test + self.to_delete.insert(0, generated_table) + def _write_csv_to_storage(self, bucket_name, blob_name, header_row, data_rows): from google.cloud._testing import _NamedTemporaryFile diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index f0525c22a..ed0dc731b 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -943,7 +943,6 @@ def test_result_default_wo_state(self): conn = make_connection( _make_retriable_exception(), begun_job_resource, - _make_retriable_exception(), done_job_resource, ) client = _make_client(project=self.PROJECT, connection=conn) @@ -963,9 +962,7 @@ def test_result_default_wo_state(self): query_params={"location": "US"}, timeout=None, ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) + conn.api_request.assert_has_calls([begin_call, begin_call, reload_call]) def test_result_w_retry_wo_state(self): begun_job_resource = _make_job_resource( diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index cf2096b8b..143e1da59 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -37,6 +37,7 @@ def _setUpConstants(self): self.INPUT_BYTES = 12345 self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 + self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -47,6 +48,7 @@ def _make_resource(self, started=False, ended=False): "datasetId": self.DS_ID, "tableId": self.TABLE_ID, } + config["referenceFileSchemaUri"] = self.REFERENCE_FILE_SCHEMA_URI if ended: resource["status"] = {"state": "DONE"} @@ -136,6 +138,12 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"]) else: self.assertIsNone(job.skip_leading_rows) + if "referenceFileSchemaUri" in config: + self.assertEqual( + job.reference_file_schema_uri, config["referenceFileSchemaUri"] + ) + else: + self.assertIsNone(job.reference_file_schema_uri) if "destinationEncryptionConfiguration" in config: self.assertIsNotNone(job.destination_encryption_configuration) @@ -186,6 +194,7 @@ def test_ctor(self): self.assertIsNone(job.use_avro_logical_types) self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) + self.assertIsNone(job.reference_file_schema_uri) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -461,6 +470,7 @@ def test_begin_w_bound_client(self): "datasetId": self.DS_ID, "tableId": self.TABLE_ID, }, + "referenceFileSchemaUri": self.REFERENCE_FILE_SCHEMA_URI, } }, }, @@ -503,6 +513,7 @@ def test_begin_w_autodetect(self): "datasetId": self.DS_ID, "tableId": self.TABLE_ID, }, + "referenceFileSchemaUri": self.REFERENCE_FILE_SCHEMA_URI, "autodetect": True, } }, @@ -585,6 +596,7 @@ def test_begin_w_alternate_client(self): config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] + config.reference_file_schema_uri = "gs://path/to/reference" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 3ef61d738..72fe2761a 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -99,6 +99,12 @@ def test_connection_id(self): ec.connection_id = "path/to/connection" self.assertEqual(ec.connection_id, "path/to/connection") + def test_reference_file_schema_uri(self): + ec = external_config.ExternalConfig("") + self.assertIsNone(ec.reference_file_schema_uri) + ec.reference_file_schema_uri = "path/to/reference" + self.assertEqual(ec.reference_file_schema_uri, "path/to/reference") + def test_schema_None(self): ec = external_config.ExternalConfig("") ec.schema = None From 207aa506ab634bdb13256fa5bd8745ec9de23290 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Tue, 15 Nov 2022 14:57:17 -0600 Subject: [PATCH 038/536] feat: add default value expression (#1408) * feat: Adds default_value_expression to SchemaField --- google/cloud/bigquery/schema.py | 38 +++++++++++++++++++- google/cloud/bigquery/table.py | 2 +- tests/system/test_client.py | 62 +++++++++++++++++++++++++++++++++ tests/unit/test_client.py | 40 ++++++++++++++------- tests/unit/test_schema.py | 9 +++-- 5 files changed, 135 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 1df78424d..ebf34e4cd 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -93,6 +93,30 @@ class SchemaField(object): Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. max_length: Maximum length of fields with STRING or BYTES type. + + default_value_expression: str, Optional + Used to specify the default value of a field using a SQL expression. It can only be set for + top level fields (columns). + + You can use a struct or array expression to specify default value for the entire struct or + array. The valid SQL expressions are: + + - Literals for all data types, including STRUCT and ARRAY. + + - The following functions: + + `CURRENT_TIMESTAMP` + `CURRENT_TIME` + `CURRENT_DATE` + `CURRENT_DATETIME` + `GENERATE_UUID` + `RAND` + `SESSION_USER` + `ST_GEOPOINT` + + - Struct or array composed with the above allowed functions, for example: + + "[CURRENT_DATE(), DATE '2020-01-01'"] """ def __init__( @@ -100,6 +124,7 @@ def __init__( name: str, field_type: str, mode: str = "NULLABLE", + default_value_expression: str = None, description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, fields: Iterable["SchemaField"] = (), policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, @@ -115,6 +140,8 @@ def __init__( self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: self._properties["description"] = description + if default_value_expression is not None: + self._properties["defaultValueExpression"] = default_value_expression if precision is not _DEFAULT_VALUE: self._properties["precision"] = precision if scale is not _DEFAULT_VALUE: @@ -154,6 +181,8 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": fields = api_repr.get("fields", ()) policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) + default_value_expression = api_repr.get("defaultValueExpression", None) + if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: policy_tags = PolicyTagList.from_api_repr(policy_tags) @@ -161,6 +190,7 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": field_type=field_type, fields=[cls.from_api_repr(f) for f in fields], mode=mode.upper(), + default_value_expression=default_value_expression, description=description, name=api_repr["name"], policy_tags=policy_tags, @@ -197,6 +227,11 @@ def is_nullable(self): """bool: whether 'mode' is 'nullable'.""" return self.mode == "NULLABLE" + @property + def default_value_expression(self): + """Optional[str] default value of a field, using an SQL expression""" + return self._properties.get("defaultValueExpression") + @property def description(self): """Optional[str]: description for the field.""" @@ -260,7 +295,7 @@ def _key(self): field_type = self.field_type.upper() if self.field_type is not None else None # Type can temporarily be set to None if the code needs a SchemaField instance, - # but has npt determined the exact type of the field yet. + # but has not determined the exact type of the field yet. if field_type is not None: if field_type == "STRING" or field_type == "BYTES": if self.max_length is not None: @@ -281,6 +316,7 @@ def _key(self): field_type, # Mode is always str, if not given it defaults to a str value self.mode.upper(), # pytype: disable=attribute-error + self.default_value_expression, self.description, self._fields, policy_tags, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 4fd77dd21..96888d62d 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1421,7 +1421,7 @@ def get(self, key: str, default: Any = None) -> Any: >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') None - The default value can be overrided with the ``default`` parameter. + The default value can be overridden with the ``default`` parameter. >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') '' diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 152bb8144..25edc18e1 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -441,6 +441,68 @@ def test_create_table_with_real_custom_policy(self): list(table.schema[1].policy_tags.names), [child_policy_tag.name] ) + def test_create_table_with_default_value_expression(self): + dataset = self.temp_dataset( + _make_dataset_id("create_table_with_default_value_expression") + ) + + table_id = "test_table" + timestamp_field_name = "timestamp_field_with_default_value_expression" + + string_default_val_expression = "'FOO'" + timestamp_default_val_expression = "CURRENT_TIMESTAMP" + + schema = [ + bigquery.SchemaField( + "username", + "STRING", + default_value_expression=string_default_val_expression, + ), + bigquery.SchemaField( + timestamp_field_name, + "TIMESTAMP", + default_value_expression=timestamp_default_val_expression, + ), + ] + table_arg = Table(dataset.table(table_id), schema=schema) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + + # Fetch the created table and its metadata to verify that the default + # value expression is assigned to fields + remote_table = Config.CLIENT.get_table(table) + remote_schema = remote_table.schema + self.assertEqual(remote_schema, schema) + + for field in remote_schema: + if field.name == string_default_val_expression: + self.assertEqual("'FOO'", field.default_value_expression) + if field.name == timestamp_default_val_expression: + self.assertEqual("CURRENT_TIMESTAMP", field.default_value_expression) + + # Insert rows into the created table to verify default values are populated + # when value is not provided + NOW_SECONDS = 1448911495.484366 + NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(tzinfo=UTC) + + # Rows to insert. Row #1 will have default `TIMESTAMP` defaultValueExpression CURRENT_TIME + # Row #2 will have default `STRING` defaultValueExpression "'FOO" + ROWS = [{"username": "john_doe"}, {timestamp_field_name: NOW}] + + errors = Config.CLIENT.insert_rows(table, ROWS) + self.assertEqual(len(errors), 0) + + # Get list of inserted rows + row_1, row_2 = [row for row in list(Config.CLIENT.list_rows(table))] + + # Assert that row values are populated with default value expression + self.assertIsInstance(row_1.get(timestamp_field_name), datetime.datetime) + self.assertEqual("FOO", row_2.get("username")) + def test_create_table_w_time_partitioning_w_clustering_fields(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 30bab8fa9..f4552cda2 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8395,9 +8395,19 @@ def test_schema_from_json_with_file_path(self): ]""" expected = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", + "STRING", + "NULLABLE", + description="sales representative", + ), + SchemaField( + "sales", + "FLOAT", + "NULLABLE", + description="total sales", + ), ] client = self._make_client() @@ -8441,9 +8451,11 @@ def test_schema_from_json_with_file_object(self): ]""" expected = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", "STRING", "NULLABLE", description="sales representative" + ), + SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"), ] client = self._make_client() @@ -8477,9 +8489,11 @@ def test_schema_to_json_with_file_path(self): ] schema_list = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", "STRING", "NULLABLE", description="sales representative" + ), + SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"), ] client = self._make_client() @@ -8521,9 +8535,11 @@ def test_schema_to_json_with_file_object(self): ] schema_list = [ - SchemaField("qtr", "STRING", "REQUIRED", "quarter"), - SchemaField("rep", "STRING", "NULLABLE", "sales representative"), - SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", "STRING", "NULLABLE", description="sales representative" + ), + SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"), ] fake_file = io.StringIO() diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 6a547cb13..c6593e1b4 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -45,8 +45,10 @@ def test_constructor_defaults(self): self.assertIsNone(field.description) self.assertEqual(field.fields, ()) self.assertIsNone(field.policy_tags) + self.assertIsNone(field.default_value_expression) def test_constructor_explicit(self): + FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" field = self._make_one( "test", "STRING", @@ -58,10 +60,12 @@ def test_constructor_explicit(self): "projects/f/locations/g/taxonomies/h/policyTags/i", ) ), + default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.default_value_expression, FIELD_DEFAULT_VALUE_EXPRESSION) self.assertEqual(field.description, "Testing") self.assertEqual(field.fields, ()) self.assertEqual( @@ -182,6 +186,7 @@ def test_from_api_repr_defaults(self): self.assertEqual(field.field_type, "RECORD") self.assertEqual(field.mode, "NULLABLE") self.assertEqual(len(field.fields), 0) + self.assertEqual(field.default_value_expression, None) # Keys not present in API representation shouldn't be included in # _properties. @@ -527,12 +532,12 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)" + expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)" self.assertEqual(repr(field1), expected) def test___repr__type_not_set(self): field1 = self._make_one("field1", field_type=None) - expected = "SchemaField('field1', None, 'NULLABLE', None, (), None)" + expected = "SchemaField('field1', None, 'NULLABLE', None, None, (), None)" self.assertEqual(repr(field1), expected) def test___repr__evaluable_no_policy_tags(self): From eb49873176dee478617eb50472d44703abca53b5 Mon Sep 17 00:00:00 2001 From: Walt Askew Date: Wed, 16 Nov 2022 05:44:06 -0800 Subject: [PATCH 039/536] feat: Add More Specific Type Annotations for Row Dictionaries (#1295) The keys must be strings as they represent column names. Update type annotations to reflect this. Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> --- google/cloud/bigquery/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 1200d78f9..b72505a15 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3349,10 +3349,10 @@ def query( def insert_rows( self, table: Union[Table, TableReference, str], - rows: Union[Iterable[Tuple], Iterable[Dict]], + rows: Union[Iterable[Tuple], Iterable[Mapping[str, Any]]], selected_fields: Sequence[SchemaField] = None, **kwargs, - ) -> Sequence[dict]: + ) -> Sequence[Dict[str, Any]]: """Insert rows into a table via the streaming API. See @@ -3470,7 +3470,7 @@ def insert_rows_from_dataframe( def insert_rows_json( self, table: Union[Table, TableReference, TableListItem, str], - json_rows: Sequence[Dict], + json_rows: Sequence[Mapping[str, Any]], row_ids: Union[ Iterable[Optional[str]], AutoRowIDs, None ] = AutoRowIDs.GENERATE_UUID, From cbab5acf971e67ca74ad9df1f62716903d234a1c Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 17 Nov 2022 15:08:37 -0500 Subject: [PATCH 040/536] chore(setup.py): remove python upper bound (#1413) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c8bf640c2..5fc694c6f 100644 --- a/setup.py +++ b/setup.py @@ -124,7 +124,7 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=3.7, <3.11", + python_requires=">=3.7", include_package_data=True, zip_safe=False, ) From 4e6cc67409ceb58eec44c9ff1fbeeaf5de341fd8 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 18 Nov 2022 10:56:42 -0600 Subject: [PATCH 041/536] chore(main): release 3.4.0 (#1407) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 9 +++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 869d063e5..294e5b42f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.4.0](https://github.com/googleapis/python-bigquery/compare/v3.3.6...v3.4.0) (2022-11-17) + + +### Features + +* Add `reference_file_schema_uri` to LoadJobConfig, ExternalConfig ([#1399](https://github.com/googleapis/python-bigquery/issues/1399)) ([931285f](https://github.com/googleapis/python-bigquery/commit/931285ff85842ab07a0ef2ff9db808181ea3c5e4)) +* Add default value expression ([#1408](https://github.com/googleapis/python-bigquery/issues/1408)) ([207aa50](https://github.com/googleapis/python-bigquery/commit/207aa506ab634bdb13256fa5bd8745ec9de23290)) +* Add More Specific Type Annotations for Row Dictionaries ([#1295](https://github.com/googleapis/python-bigquery/issues/1295)) ([eb49873](https://github.com/googleapis/python-bigquery/commit/eb49873176dee478617eb50472d44703abca53b5)) + ## [3.3.6](https://github.com/googleapis/python-bigquery/compare/v3.3.4...v3.3.6) (2022-11-02) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 43360a201..6b822f0c1 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.3.6" +__version__ = "3.4.0" From 0f08e9a8ff638e78006d71acd974de2dff89b5d9 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Fri, 18 Nov 2022 12:43:30 -0600 Subject: [PATCH 042/536] docs: add info about streaming quota limits to `insert_rows*` methods (#1409) * docs: add information about streaming quota limits (413: Payload Too Large) Co-authored-by: Anthonios Partheniou --- google/cloud/bigquery/client.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index b72505a15..1f3647e71 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3358,6 +3358,14 @@ def insert_rows( See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll + BigQuery will reject insertAll payloads that exceed a defined limit (10MB). + Additionally, if a payload vastly exceeds this limit, the request is rejected + by the intermediate architecture, which returns a 413 (Payload Too Large) status code. + + + See + https://cloud.google.com/bigquery/quotas#streaming_inserts + Args: table (Union[ \ google.cloud.bigquery.table.Table, \ @@ -3424,6 +3432,13 @@ def insert_rows_from_dataframe( ) -> Sequence[Sequence[dict]]: """Insert rows into a table from a dataframe via the streaming API. + BigQuery will reject insertAll payloads that exceed a defined limit (10MB). + Additionally, if a payload vastly exceeds this limit, the request is rejected + by the intermediate architecture, which returns a 413 (Payload Too Large) status code. + + See + https://cloud.google.com/bigquery/quotas#streaming_inserts + Args: table (Union[ \ google.cloud.bigquery.table.Table, \ @@ -3485,6 +3500,13 @@ def insert_rows_json( See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll + BigQuery will reject insertAll payloads that exceed a defined limit (10MB). + Additionally, if a payload vastly exceeds this limit, the request is rejected + by the intermediate architecture, which returns a 413 (Payload Too Large) status code. + + See + https://cloud.google.com/bigquery/quotas#streaming_inserts + Args: table (Union[ \ google.cloud.bigquery.table.Table \ From 40e4da78bb690ff4c94832321377bb1590e2eeaf Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 30 Nov 2022 10:37:11 -0500 Subject: [PATCH 043/536] chore(python): drop flake8-import-order in samples noxfile (#1424) Source-Link: https://github.com/googleapis/synthtool/commit/6ed3a831cb9ff69ef8a504c353e098ec0192ad93 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3abfa0f1886adaf0b83f07cb117b24a639ea1cb9cffe56d43280b977033563eb Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/docker/docs/Dockerfile | 12 +- .kokoro/requirements.in | 4 +- .kokoro/requirements.txt | 354 ++++++++++++++++++--------------- samples/geography/noxfile.py | 26 +-- samples/magics/noxfile.py | 26 +-- samples/snippets/noxfile.py | 26 +-- 7 files changed, 212 insertions(+), 238 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 3815c983c..bb21147e4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:7a40313731a7cb1454eef6b33d3446ebb121836738dc3ab3d2d3ded5268c35b6 + digest: sha256:3abfa0f1886adaf0b83f07cb117b24a639ea1cb9cffe56d43280b977033563eb diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index 238b87b9d..f8137d0ae 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -60,16 +60,16 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb -###################### Install python 3.8.11 +###################### Install python 3.9.13 -# Download python 3.8.11 -RUN wget https://www.python.org/ftp/python/3.8.11/Python-3.8.11.tgz +# Download python 3.9.13 +RUN wget https://www.python.org/ftp/python/3.9.13/Python-3.9.13.tgz # Extract files -RUN tar -xvf Python-3.8.11.tgz +RUN tar -xvf Python-3.9.13.tgz -# Install python 3.8.11 -RUN ./Python-3.8.11/configure --enable-optimizations +# Install python 3.9.13 +RUN ./Python-3.9.13/configure --enable-optimizations RUN make altinstall ###################### Install pip diff --git a/.kokoro/requirements.in b/.kokoro/requirements.in index 7718391a3..cbd7e77f4 100644 --- a/.kokoro/requirements.in +++ b/.kokoro/requirements.in @@ -5,4 +5,6 @@ typing-extensions twine wheel setuptools -nox \ No newline at end of file +nox +charset-normalizer<3 +click<8.1.0 diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index d15994bac..9c1b9be34 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -20,9 +20,9 @@ cachetools==5.2.0 \ --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db # via google-auth -certifi==2022.6.15 \ - --hash=sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d \ - --hash=sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412 +certifi==2022.9.24 \ + --hash=sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14 \ + --hash=sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382 # via requests cffi==1.15.1 \ --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ @@ -93,11 +93,14 @@ cffi==1.15.1 \ charset-normalizer==2.1.1 \ --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f - # via requests + # via + # -r requirements.in + # requests click==8.0.4 \ --hash=sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1 \ --hash=sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb # via + # -r requirements.in # gcp-docuploader # gcp-releasetool colorlog==6.7.0 \ @@ -110,29 +113,33 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==37.0.4 \ - --hash=sha256:190f82f3e87033821828f60787cfa42bff98404483577b591429ed99bed39d59 \ - --hash=sha256:2be53f9f5505673eeda5f2736bea736c40f051a739bfae2f92d18aed1eb54596 \ - --hash=sha256:30788e070800fec9bbcf9faa71ea6d8068f5136f60029759fd8c3efec3c9dcb3 \ - --hash=sha256:3d41b965b3380f10e4611dbae366f6dc3cefc7c9ac4e8842a806b9672ae9add5 \ - --hash=sha256:4c590ec31550a724ef893c50f9a97a0c14e9c851c85621c5650d699a7b88f7ab \ - --hash=sha256:549153378611c0cca1042f20fd9c5030d37a72f634c9326e225c9f666d472884 \ - --hash=sha256:63f9c17c0e2474ccbebc9302ce2f07b55b3b3fcb211ded18a42d5764f5c10a82 \ - --hash=sha256:6bc95ed67b6741b2607298f9ea4932ff157e570ef456ef7ff0ef4884a134cc4b \ - --hash=sha256:7099a8d55cd49b737ffc99c17de504f2257e3787e02abe6d1a6d136574873441 \ - --hash=sha256:75976c217f10d48a8b5a8de3d70c454c249e4b91851f6838a4e48b8f41eb71aa \ - --hash=sha256:7bc997818309f56c0038a33b8da5c0bfbb3f1f067f315f9abd6fc07ad359398d \ - --hash=sha256:80f49023dd13ba35f7c34072fa17f604d2f19bf0989f292cedf7ab5770b87a0b \ - --hash=sha256:91ce48d35f4e3d3f1d83e29ef4a9267246e6a3be51864a5b7d2247d5086fa99a \ - --hash=sha256:a958c52505c8adf0d3822703078580d2c0456dd1d27fabfb6f76fe63d2971cd6 \ - --hash=sha256:b62439d7cd1222f3da897e9a9fe53bbf5c104fff4d60893ad1355d4c14a24157 \ - --hash=sha256:b7f8dd0d4c1f21759695c05a5ec8536c12f31611541f8904083f3dc582604280 \ - --hash=sha256:d204833f3c8a33bbe11eda63a54b1aad7aa7456ed769a982f21ec599ba5fa282 \ - --hash=sha256:e007f052ed10cc316df59bc90fbb7ff7950d7e2919c9757fd42a2b8ecf8a5f67 \ - --hash=sha256:f2dcb0b3b63afb6df7fd94ec6fbddac81b5492513f7b0436210d390c14d46ee8 \ - --hash=sha256:f721d1885ecae9078c3f6bbe8a88bc0786b6e749bf32ccec1ef2b18929a05046 \ - --hash=sha256:f7a6de3e98771e183645181b3627e2563dcde3ce94a9e42a3f427d2255190327 \ - --hash=sha256:f8c0a6e9e1dd3eb0414ba320f85da6b0dcbd543126e30fcc546e7372a7fbf3b9 +cryptography==38.0.3 \ + --hash=sha256:068147f32fa662c81aebab95c74679b401b12b57494872886eb5c1139250ec5d \ + --hash=sha256:06fc3cc7b6f6cca87bd56ec80a580c88f1da5306f505876a71c8cfa7050257dd \ + --hash=sha256:25c1d1f19729fb09d42e06b4bf9895212292cb27bb50229f5aa64d039ab29146 \ + --hash=sha256:402852a0aea73833d982cabb6d0c3bb582c15483d29fb7085ef2c42bfa7e38d7 \ + --hash=sha256:4e269dcd9b102c5a3d72be3c45d8ce20377b8076a43cbed6f660a1afe365e436 \ + --hash=sha256:5419a127426084933076132d317911e3c6eb77568a1ce23c3ac1e12d111e61e0 \ + --hash=sha256:554bec92ee7d1e9d10ded2f7e92a5d70c1f74ba9524947c0ba0c850c7b011828 \ + --hash=sha256:5e89468fbd2fcd733b5899333bc54d0d06c80e04cd23d8c6f3e0542358c6060b \ + --hash=sha256:65535bc550b70bd6271984d9863a37741352b4aad6fb1b3344a54e6950249b55 \ + --hash=sha256:6ab9516b85bebe7aa83f309bacc5f44a61eeb90d0b4ec125d2d003ce41932d36 \ + --hash=sha256:6addc3b6d593cd980989261dc1cce38263c76954d758c3c94de51f1e010c9a50 \ + --hash=sha256:728f2694fa743a996d7784a6194da430f197d5c58e2f4e278612b359f455e4a2 \ + --hash=sha256:785e4056b5a8b28f05a533fab69febf5004458e20dad7e2e13a3120d8ecec75a \ + --hash=sha256:78cf5eefac2b52c10398a42765bfa981ce2372cbc0457e6bf9658f41ec3c41d8 \ + --hash=sha256:7f836217000342d448e1c9a342e9163149e45d5b5eca76a30e84503a5a96cab0 \ + --hash=sha256:8d41a46251bf0634e21fac50ffd643216ccecfaf3701a063257fe0b2be1b6548 \ + --hash=sha256:984fe150f350a3c91e84de405fe49e688aa6092b3525f407a18b9646f6612320 \ + --hash=sha256:9b24bcff7853ed18a63cfb0c2b008936a9554af24af2fb146e16d8e1aed75748 \ + --hash=sha256:b1b35d9d3a65542ed2e9d90115dfd16bbc027b3f07ee3304fc83580f26e43249 \ + --hash=sha256:b1b52c9e5f8aa2b802d48bd693190341fae201ea51c7a167d69fc48b60e8a959 \ + --hash=sha256:bbf203f1a814007ce24bd4d51362991d5cb90ba0c177a9c08825f2cc304d871f \ + --hash=sha256:be243c7e2bfcf6cc4cb350c0d5cdf15ca6383bbcb2a8ef51d3c9411a9d4386f0 \ + --hash=sha256:bfbe6ee19615b07a98b1d2287d6a6073f734735b49ee45b11324d85efc4d5cbd \ + --hash=sha256:c46837ea467ed1efea562bbeb543994c2d1f6e800785bd5a2c98bc096f5cb220 \ + --hash=sha256:dfb4f4dd568de1b6af9f4cda334adf7d72cf5bc052516e1b2608b683375dd95c \ + --hash=sha256:ed7b00096790213e09eb11c97cc6e2b757f15f3d2f85833cd2d3ec3fe37c1722 # via # gcp-releasetool # secretstorage @@ -148,23 +155,23 @@ filelock==3.8.0 \ --hash=sha256:55447caa666f2198c5b6b13a26d2084d26fa5b115c00d065664b2124680c4edc \ --hash=sha256:617eb4e5eedc82fc5f47b6d61e4d11cb837c56cb4544e39081099fa17ad109d4 # via virtualenv -gcp-docuploader==0.6.3 \ - --hash=sha256:ba8c9d76b3bbac54b0311c503a373b00edc2dc02d6d54ea9507045adb8e870f7 \ - --hash=sha256:c0f5aaa82ce1854a386197e4e359b120ad6d4e57ae2c812fce42219a3288026b +gcp-docuploader==0.6.4 \ + --hash=sha256:01486419e24633af78fd0167db74a2763974765ee8078ca6eb6964d0ebd388af \ + --hash=sha256:70861190c123d907b3b067da896265ead2eeb9263969d6955c9e0bb091b5ccbf # via -r requirements.in -gcp-releasetool==1.8.7 \ - --hash=sha256:3d2a67c9db39322194afb3b427e9cb0476ce8f2a04033695f0aeb63979fc2b37 \ - --hash=sha256:5e4d28f66e90780d77f3ecf1e9155852b0c3b13cbccb08ab07e66b2357c8da8d +gcp-releasetool==1.10.0 \ + --hash=sha256:72a38ca91b59c24f7e699e9227c90cbe4dd71b789383cb0164b088abae294c83 \ + --hash=sha256:8c7c99320208383d4bb2b808c6880eb7a81424afe7cdba3c8d84b25f4f0e097d # via -r requirements.in -google-api-core==2.8.2 \ - --hash=sha256:06f7244c640322b508b125903bb5701bebabce8832f85aba9335ec00b3d02edc \ - --hash=sha256:93c6a91ccac79079ac6bbf8b74ee75db970cc899278b97d53bc012f35908cf50 +google-api-core==2.10.2 \ + --hash=sha256:10c06f7739fe57781f87523375e8e1a3a4674bf6392cd6131a3222182b971320 \ + --hash=sha256:34f24bd1d5f72a8c4519773d99ca6bf080a6c4e041b4e9f024fe230191dda62e # via # google-cloud-core # google-cloud-storage -google-auth==2.11.0 \ - --hash=sha256:be62acaae38d0049c21ca90f27a23847245c9f161ff54ede13af2cb6afecbac9 \ - --hash=sha256:ed65ecf9f681832298e29328e1ef0a3676e3732b2e56f41532d45f70a22de0fb +google-auth==2.14.1 \ + --hash=sha256:ccaa901f31ad5cbb562615eb8b664b3dd0bf5404a67618e642307f00613eda4d \ + --hash=sha256:f5d8701633bebc12e0deea4df8abd8aff31c28b355360597f7f2ee60f2e4d016 # via # gcp-releasetool # google-api-core @@ -174,76 +181,102 @@ google-cloud-core==2.3.2 \ --hash=sha256:8417acf6466be2fa85123441696c4badda48db314c607cf1e5d543fa8bdc22fe \ --hash=sha256:b9529ee7047fd8d4bf4a2182de619154240df17fbe60ead399078c1ae152af9a # via google-cloud-storage -google-cloud-storage==2.5.0 \ - --hash=sha256:19a26c66c317ce542cea0830b7e787e8dac2588b6bfa4d3fd3b871ba16305ab0 \ - --hash=sha256:382f34b91de2212e3c2e7b40ec079d27ee2e3dbbae99b75b1bcd8c63063ce235 +google-cloud-storage==2.6.0 \ + --hash=sha256:104ca28ae61243b637f2f01455cc8a05e8f15a2a18ced96cb587241cdd3820f5 \ + --hash=sha256:4ad0415ff61abdd8bb2ae81c1f8f7ec7d91a1011613f2db87c614c550f97bfe9 # via gcp-docuploader -google-crc32c==1.3.0 \ - --hash=sha256:04e7c220798a72fd0f08242bc8d7a05986b2a08a0573396187fd32c1dcdd58b3 \ - --hash=sha256:05340b60bf05b574159e9bd940152a47d38af3fb43803ffe71f11d704b7696a6 \ - --hash=sha256:12674a4c3b56b706153a358eaa1018c4137a5a04635b92b4652440d3d7386206 \ - --hash=sha256:127f9cc3ac41b6a859bd9dc4321097b1a4f6aa7fdf71b4f9227b9e3ebffb4422 \ - --hash=sha256:13af315c3a0eec8bb8b8d80b8b128cb3fcd17d7e4edafc39647846345a3f003a \ - --hash=sha256:1926fd8de0acb9d15ee757175ce7242e235482a783cd4ec711cc999fc103c24e \ - --hash=sha256:226f2f9b8e128a6ca6a9af9b9e8384f7b53a801907425c9a292553a3a7218ce0 \ - --hash=sha256:276de6273eb074a35bc598f8efbc00c7869c5cf2e29c90748fccc8c898c244df \ - --hash=sha256:318f73f5484b5671f0c7f5f63741ab020a599504ed81d209b5c7129ee4667407 \ - --hash=sha256:3bbce1be3687bbfebe29abdb7631b83e6b25da3f4e1856a1611eb21854b689ea \ - --hash=sha256:42ae4781333e331a1743445931b08ebdad73e188fd554259e772556fc4937c48 \ - --hash=sha256:58be56ae0529c664cc04a9c76e68bb92b091e0194d6e3c50bea7e0f266f73713 \ - --hash=sha256:5da2c81575cc3ccf05d9830f9e8d3c70954819ca9a63828210498c0774fda1a3 \ - --hash=sha256:6311853aa2bba4064d0c28ca54e7b50c4d48e3de04f6770f6c60ebda1e975267 \ - --hash=sha256:650e2917660e696041ab3dcd7abac160b4121cd9a484c08406f24c5964099829 \ - --hash=sha256:6a4db36f9721fdf391646685ecffa404eb986cbe007a3289499020daf72e88a2 \ - --hash=sha256:779cbf1ce375b96111db98fca913c1f5ec11b1d870e529b1dc7354b2681a8c3a \ - --hash=sha256:7f6fe42536d9dcd3e2ffb9d3053f5d05221ae3bbcefbe472bdf2c71c793e3183 \ - --hash=sha256:891f712ce54e0d631370e1f4997b3f182f3368179198efc30d477c75d1f44942 \ - --hash=sha256:95c68a4b9b7828ba0428f8f7e3109c5d476ca44996ed9a5f8aac6269296e2d59 \ - --hash=sha256:96a8918a78d5d64e07c8ea4ed2bc44354e3f93f46a4866a40e8db934e4c0d74b \ - --hash=sha256:9c3cf890c3c0ecfe1510a452a165431b5831e24160c5fcf2071f0f85ca5a47cd \ - --hash=sha256:9f58099ad7affc0754ae42e6d87443299f15d739b0ce03c76f515153a5cda06c \ - --hash=sha256:a0b9e622c3b2b8d0ce32f77eba617ab0d6768b82836391e4f8f9e2074582bf02 \ - --hash=sha256:a7f9cbea4245ee36190f85fe1814e2d7b1e5f2186381b082f5d59f99b7f11328 \ - --hash=sha256:bab4aebd525218bab4ee615786c4581952eadc16b1ff031813a2fd51f0cc7b08 \ - --hash=sha256:c124b8c8779bf2d35d9b721e52d4adb41c9bfbde45e6a3f25f0820caa9aba73f \ - --hash=sha256:c9da0a39b53d2fab3e5467329ed50e951eb91386e9d0d5b12daf593973c3b168 \ - --hash=sha256:ca60076c388728d3b6ac3846842474f4250c91efbfe5afa872d3ffd69dd4b318 \ - --hash=sha256:cb6994fff247987c66a8a4e550ef374671c2b82e3c0d2115e689d21e511a652d \ - --hash=sha256:d1c1d6236feab51200272d79b3d3e0f12cf2cbb12b208c835b175a21efdb0a73 \ - --hash=sha256:dd7760a88a8d3d705ff562aa93f8445ead54f58fd482e4f9e2bafb7e177375d4 \ - --hash=sha256:dda4d8a3bb0b50f540f6ff4b6033f3a74e8bf0bd5320b70fab2c03e512a62812 \ - --hash=sha256:e0f1ff55dde0ebcfbef027edc21f71c205845585fffe30d4ec4979416613e9b3 \ - --hash=sha256:e7a539b9be7b9c00f11ef16b55486141bc2cdb0c54762f84e3c6fc091917436d \ - --hash=sha256:eb0b14523758e37802f27b7f8cd973f5f3d33be7613952c0df904b68c4842f0e \ - --hash=sha256:ed447680ff21c14aaceb6a9f99a5f639f583ccfe4ce1a5e1d48eb41c3d6b3217 \ - --hash=sha256:f52a4ad2568314ee713715b1e2d79ab55fab11e8b304fd1462ff5cccf4264b3e \ - --hash=sha256:fbd60c6aaa07c31d7754edbc2334aef50601b7f1ada67a96eb1eb57c7c72378f \ - --hash=sha256:fc28e0db232c62ca0c3600884933178f0825c99be4474cdd645e378a10588125 \ - --hash=sha256:fe31de3002e7b08eb20823b3735b97c86c5926dd0581c7710a680b418a8709d4 \ - --hash=sha256:fec221a051150eeddfdfcff162e6db92c65ecf46cb0f7bb1bf812a1520ec026b \ - --hash=sha256:ff71073ebf0e42258a42a0b34f2c09ec384977e7f6808999102eedd5b49920e3 +google-crc32c==1.5.0 \ + --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ + --hash=sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876 \ + --hash=sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c \ + --hash=sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289 \ + --hash=sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298 \ + --hash=sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02 \ + --hash=sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f \ + --hash=sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2 \ + --hash=sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a \ + --hash=sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb \ + --hash=sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210 \ + --hash=sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5 \ + --hash=sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee \ + --hash=sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c \ + --hash=sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a \ + --hash=sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314 \ + --hash=sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd \ + --hash=sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65 \ + --hash=sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37 \ + --hash=sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4 \ + --hash=sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13 \ + --hash=sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894 \ + --hash=sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31 \ + --hash=sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e \ + --hash=sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709 \ + --hash=sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740 \ + --hash=sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc \ + --hash=sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d \ + --hash=sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c \ + --hash=sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c \ + --hash=sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d \ + --hash=sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906 \ + --hash=sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61 \ + --hash=sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57 \ + --hash=sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c \ + --hash=sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a \ + --hash=sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438 \ + --hash=sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946 \ + --hash=sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7 \ + --hash=sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96 \ + --hash=sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091 \ + --hash=sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae \ + --hash=sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d \ + --hash=sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88 \ + --hash=sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2 \ + --hash=sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd \ + --hash=sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541 \ + --hash=sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728 \ + --hash=sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178 \ + --hash=sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968 \ + --hash=sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346 \ + --hash=sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8 \ + --hash=sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93 \ + --hash=sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7 \ + --hash=sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273 \ + --hash=sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462 \ + --hash=sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94 \ + --hash=sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd \ + --hash=sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e \ + --hash=sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57 \ + --hash=sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b \ + --hash=sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9 \ + --hash=sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a \ + --hash=sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100 \ + --hash=sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325 \ + --hash=sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183 \ + --hash=sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556 \ + --hash=sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4 # via google-resumable-media -google-resumable-media==2.3.3 \ - --hash=sha256:27c52620bd364d1c8116eaac4ea2afcbfb81ae9139fb3199652fcac1724bfb6c \ - --hash=sha256:5b52774ea7a829a8cdaa8bd2d4c3d4bc660c91b30857ab2668d0eb830f4ea8c5 +google-resumable-media==2.4.0 \ + --hash=sha256:2aa004c16d295c8f6c33b2b4788ba59d366677c0a25ae7382436cb30f776deaa \ + --hash=sha256:8d5518502f92b9ecc84ac46779bd4f09694ecb3ba38a3e7ca737a86d15cbca1f # via google-cloud-storage -googleapis-common-protos==1.56.4 \ - --hash=sha256:8eb2cbc91b69feaf23e32452a7ae60e791e09967d81d4fcc7fc388182d1bd394 \ - --hash=sha256:c25873c47279387cfdcbdafa36149887901d36202cb645a0e4f29686bf6e4417 +googleapis-common-protos==1.57.0 \ + --hash=sha256:27a849d6205838fb6cc3c1c21cb9800707a661bb21c6ce7fb13e99eb1f8a0c46 \ + --hash=sha256:a9f4a1d7f6d9809657b7f1316a1aa527f6664891531bcfcc13b6696e685f443c # via google-api-core -idna==3.3 \ - --hash=sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff \ - --hash=sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d +idna==3.4 \ + --hash=sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4 \ + --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 # via requests -importlib-metadata==4.12.0 \ - --hash=sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670 \ - --hash=sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23 +importlib-metadata==5.0.0 \ + --hash=sha256:da31db32b304314d044d3c12c79bd59e307889b287ad12ff387b3500835fc2ab \ + --hash=sha256:ddb0e35065e8938f867ed4928d0ae5bf2a53b7773871bfe6bcc7e4fcdc7dea43 # via # -r requirements.in + # keyring # twine -jaraco-classes==3.2.2 \ - --hash=sha256:6745f113b0b588239ceb49532aa09c3ebb947433ce311ef2f8e3ad64ebb74594 \ - --hash=sha256:e6ef6fd3fcf4579a7a019d87d1e56a883f4e4c35cfe925f86731abc58804e647 +jaraco-classes==3.2.3 \ + --hash=sha256:2353de3288bc6b82120752201c6b1c1a14b058267fa424ed5ce5984e3b922158 \ + --hash=sha256:89559fa5c1d3c34eff6f631ad80bb21f378dbcbb35dd161fd2c6b93f5be2f98a # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ @@ -255,9 +288,9 @@ jinja2==3.1.2 \ --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 # via gcp-releasetool -keyring==23.9.0 \ - --hash=sha256:4c32a31174faaee48f43a7e2c7e9c3216ec5e95acf22a2bebfb4a1d05056ee44 \ - --hash=sha256:98f060ec95ada2ab910c195a2d4317be6ef87936a766b239c46aa3c7aac4f0db +keyring==23.11.0 \ + --hash=sha256:3dd30011d555f1345dec2c262f0153f2f0ca6bca041fb1dc4588349bb4c0ac1e \ + --hash=sha256:ad192263e2cdd5f12875dedc2da13534359a7e760e77f8d04b50968a821c2361 # via # gcp-releasetool # twine @@ -303,9 +336,9 @@ markupsafe==2.1.1 \ --hash=sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a \ --hash=sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7 # via jinja2 -more-itertools==8.14.0 \ - --hash=sha256:1bc4f91ee5b1b31ac7ceacc17c09befe6a40a503907baf9c839c229b5095cfd2 \ - --hash=sha256:c09443cd3d5438b8dafccd867a6bc1cb0894389e90cb53d227456b0b0bccb750 +more-itertools==9.0.0 \ + --hash=sha256:250e83d7e81d0c87ca6bd942e6aeab8cc9daa6096d12c5308f3f92fa5e5c1f41 \ + --hash=sha256:5a6257e40878ef0520b1803990e3e22303a41b5714006c32a3fd8304b26ea1ab # via jaraco-classes nox==2022.8.7 \ --hash=sha256:1b894940551dc5c389f9271d197ca5d655d40bdc6ccf93ed6880e4042760a34b \ @@ -321,34 +354,33 @@ pkginfo==1.8.3 \ --hash=sha256:848865108ec99d4901b2f7e84058b6e7660aae8ae10164e015a6dcf5b242a594 \ --hash=sha256:a84da4318dd86f870a9447a8c98340aa06216bfc6f2b7bdc4b8766984ae1867c # via twine -platformdirs==2.5.2 \ - --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ - --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 +platformdirs==2.5.4 \ + --hash=sha256:1006647646d80f16130f052404c6b901e80ee4ed6bef6792e1f238a8969106f7 \ + --hash=sha256:af0276409f9a02373d540bf8480021a048711d572745aef4b7842dad245eba10 # via virtualenv -protobuf==3.20.2 \ - --hash=sha256:03d76b7bd42ac4a6e109742a4edf81ffe26ffd87c5993126d894fe48a120396a \ - --hash=sha256:09e25909c4297d71d97612f04f41cea8fa8510096864f2835ad2f3b3df5a5559 \ - --hash=sha256:18e34a10ae10d458b027d7638a599c964b030c1739ebd035a1dfc0e22baa3bfe \ - --hash=sha256:291fb4307094bf5ccc29f424b42268640e00d5240bf0d9b86bf3079f7576474d \ - --hash=sha256:2c0b040d0b5d5d207936ca2d02f00f765906622c07d3fa19c23a16a8ca71873f \ - --hash=sha256:384164994727f274cc34b8abd41a9e7e0562801361ee77437099ff6dfedd024b \ - --hash=sha256:3cb608e5a0eb61b8e00fe641d9f0282cd0eedb603be372f91f163cbfbca0ded0 \ - --hash=sha256:5d9402bf27d11e37801d1743eada54372f986a372ec9679673bfcc5c60441151 \ - --hash=sha256:712dca319eee507a1e7df3591e639a2b112a2f4a62d40fe7832a16fd19151750 \ - --hash=sha256:7a5037af4e76c975b88c3becdf53922b5ffa3f2cddf657574a4920a3b33b80f3 \ - --hash=sha256:8228e56a865c27163d5d1d1771d94b98194aa6917bcfb6ce139cbfa8e3c27334 \ - --hash=sha256:84a1544252a933ef07bb0b5ef13afe7c36232a774affa673fc3636f7cee1db6c \ - --hash=sha256:84fe5953b18a383fd4495d375fe16e1e55e0a3afe7b4f7b4d01a3a0649fcda9d \ - --hash=sha256:9c673c8bfdf52f903081816b9e0e612186684f4eb4c17eeb729133022d6032e3 \ - --hash=sha256:9f876a69ca55aed879b43c295a328970306e8e80a263ec91cf6e9189243c613b \ - --hash=sha256:a9e5ae5a8e8985c67e8944c23035a0dff2c26b0f5070b2f55b217a1c33bbe8b1 \ - --hash=sha256:b4fdb29c5a7406e3f7ef176b2a7079baa68b5b854f364c21abe327bbeec01cdb \ - --hash=sha256:c184485e0dfba4dfd451c3bd348c2e685d6523543a0f91b9fd4ae90eb09e8422 \ - --hash=sha256:c9cdf251c582c16fd6a9f5e95836c90828d51b0069ad22f463761d27c6c19019 \ - --hash=sha256:e39cf61bb8582bda88cdfebc0db163b774e7e03364bbf9ce1ead13863e81e359 \ - --hash=sha256:e8fbc522303e09036c752a0afcc5c0603e917222d8bedc02813fd73b4b4ed804 \ - --hash=sha256:f34464ab1207114e73bba0794d1257c150a2b89b7a9faf504e00af7c9fd58978 \ - --hash=sha256:f52dabc96ca99ebd2169dadbe018824ebda08a795c7684a0b7d203a290f3adb0 +protobuf==3.20.3 \ + --hash=sha256:03038ac1cfbc41aa21f6afcbcd357281d7521b4157926f30ebecc8d4ea59dcb7 \ + --hash=sha256:28545383d61f55b57cf4df63eebd9827754fd2dc25f80c5253f9184235db242c \ + --hash=sha256:2e3427429c9cffebf259491be0af70189607f365c2f41c7c3764af6f337105f2 \ + --hash=sha256:398a9e0c3eaceb34ec1aee71894ca3299605fa8e761544934378bbc6c97de23b \ + --hash=sha256:44246bab5dd4b7fbd3c0c80b6f16686808fab0e4aca819ade6e8d294a29c7050 \ + --hash=sha256:447d43819997825d4e71bf5769d869b968ce96848b6479397e29fc24c4a5dfe9 \ + --hash=sha256:67a3598f0a2dcbc58d02dd1928544e7d88f764b47d4a286202913f0b2801c2e7 \ + --hash=sha256:74480f79a023f90dc6e18febbf7b8bac7508420f2006fabd512013c0c238f454 \ + --hash=sha256:819559cafa1a373b7096a482b504ae8a857c89593cf3a25af743ac9ecbd23480 \ + --hash=sha256:899dc660cd599d7352d6f10d83c95df430a38b410c1b66b407a6b29265d66469 \ + --hash=sha256:8c0c984a1b8fef4086329ff8dd19ac77576b384079247c770f29cc8ce3afa06c \ + --hash=sha256:9aae4406ea63d825636cc11ffb34ad3379335803216ee3a856787bcf5ccc751e \ + --hash=sha256:a7ca6d488aa8ff7f329d4c545b2dbad8ac31464f1d8b1c87ad1346717731e4db \ + --hash=sha256:b6cc7ba72a8850621bfec987cb72623e703b7fe2b9127a161ce61e61558ad905 \ + --hash=sha256:bf01b5720be110540be4286e791db73f84a2b721072a3711efff6c324cdf074b \ + --hash=sha256:c02ce36ec760252242a33967d51c289fd0e1c0e6e5cc9397e2279177716add86 \ + --hash=sha256:d9e4432ff660d67d775c66ac42a67cf2453c27cb4d738fc22cb53b5d84c135d4 \ + --hash=sha256:daa564862dd0d39c00f8086f88700fdbe8bc717e993a21e90711acfed02f2402 \ + --hash=sha256:de78575669dddf6099a8a0f46a27e82a1783c557ccc38ee620ed8cc96d3be7d7 \ + --hash=sha256:e64857f395505ebf3d2569935506ae0dfc4a15cb80dc25261176c784662cdcc4 \ + --hash=sha256:f4bd856d702e5b0d96a00ec6b307b0f51c1982c2bf9c0052cf9019e9a544ba99 \ + --hash=sha256:f4c42102bc82a51108e449cbb32b19b180022941c727bac0cfd50170341f16ee # via # gcp-docuploader # gcp-releasetool @@ -377,9 +409,9 @@ pygments==2.13.0 \ # via # readme-renderer # rich -pyjwt==2.4.0 \ - --hash=sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf \ - --hash=sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba +pyjwt==2.6.0 \ + --hash=sha256:69285c7e31fc44f68a1feb309e948e0df53259d579295e6cfe2b1792329f05fd \ + --hash=sha256:d83c3d892a77bbb74d3e1a2cfa90afaadb60945205d1095d9221f04466f64c14 # via gcp-releasetool pyparsing==3.0.9 \ --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ @@ -392,9 +424,9 @@ python-dateutil==2.8.2 \ --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 # via gcp-releasetool -readme-renderer==37.0 \ - --hash=sha256:07b7ea234e03e58f77cc222e206e6abb8f4c0435becce5104794ee591f9301c5 \ - --hash=sha256:9fa416704703e509eeb900696751c908ddeb2011319d93700d8f18baff887a69 +readme-renderer==37.3 \ + --hash=sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273 \ + --hash=sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343 # via twine requests==2.28.1 \ --hash=sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983 \ @@ -405,17 +437,17 @@ requests==2.28.1 \ # google-cloud-storage # requests-toolbelt # twine -requests-toolbelt==0.9.1 \ - --hash=sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f \ - --hash=sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0 +requests-toolbelt==0.10.1 \ + --hash=sha256:18565aa58116d9951ac39baa288d3adb5b3ff975c4f25eee78555d89e8f247f7 \ + --hash=sha256:62e09f7ff5ccbda92772a29f394a49c3ad6cb181d568b1337626b2abb628a63d # via twine rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c # via twine -rich==12.5.1 \ - --hash=sha256:2eb4e6894cde1e017976d2975ac210ef515d7548bc595ba20e195fb9628acdeb \ - --hash=sha256:63a5c5ce3673d3d5fbbf23cd87e11ab84b6b451436f1b7f19ec54b6bc36ed7ca +rich==12.6.0 \ + --hash=sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e \ + --hash=sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0 # via twine rsa==4.9 \ --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ @@ -437,9 +469,9 @@ twine==4.0.1 \ --hash=sha256:42026c18e394eac3e06693ee52010baa5313e4811d5a11050e7d48436cf41b9e \ --hash=sha256:96b1cf12f7ae611a4a40b6ae8e9570215daff0611828f5fe1f37a16255ab24a0 # via -r requirements.in -typing-extensions==4.3.0 \ - --hash=sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02 \ - --hash=sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6 +typing-extensions==4.4.0 \ + --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ + --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e # via -r requirements.in urllib3==1.26.12 \ --hash=sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e \ @@ -447,25 +479,25 @@ urllib3==1.26.12 \ # via # requests # twine -virtualenv==20.16.4 \ - --hash=sha256:014f766e4134d0008dcaa1f95bafa0fb0f575795d07cae50b1bee514185d6782 \ - --hash=sha256:035ed57acce4ac35c82c9d8802202b0e71adac011a511ff650cbcf9635006a22 +virtualenv==20.16.7 \ + --hash=sha256:8691e3ff9387f743e00f6bb20f70121f5e4f596cae754531f2b3b3a1b1ac696e \ + --hash=sha256:efd66b00386fdb7dbe4822d172303f40cd05e50e01740b19ea42425cbe653e29 # via nox webencodings==0.5.1 \ --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \ --hash=sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923 # via bleach -wheel==0.37.1 \ - --hash=sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a \ - --hash=sha256:e9a504e793efbca1b8e0e9cb979a249cf4a0a7b5b8c9e8b65a5e39d49529c1c4 +wheel==0.38.4 \ + --hash=sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac \ + --hash=sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8 # via -r requirements.in -zipp==3.8.1 \ - --hash=sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2 \ - --hash=sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009 +zipp==3.10.0 \ + --hash=sha256:4fcb6f278987a6605757302a6e40e896257570d11c51628968ccb2a47e80c6c1 \ + --hash=sha256:7a7262fd930bd3e36c50b9a64897aec3fafff3dfdeec9623ae22b40e93f99bb8 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==65.2.0 \ - --hash=sha256:7f4bc85450898a09f76ebf28b72fa25bc7111f6c7d665d514a60bba9c75ef2a9 \ - --hash=sha256:a3ca5857c89f82f5c9410e8508cb32f4872a3bafd4aa7ae122a24ca33bccc750 +setuptools==65.5.1 \ + --hash=sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31 \ + --hash=sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f # via -r requirements.in diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index b053ca568..e8283c38d 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -18,7 +18,7 @@ import os from pathlib import Path import sys -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, Optional import nox @@ -109,22 +109,6 @@ def get_pytest_env_vars() -> Dict[str, str]: # -def _determine_local_import_names(start_dir: str) -> List[str]: - """Determines all import names that should be considered "local". - - This is used when running the linter to insure that import order is - properly checked. - """ - file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] - return [ - basename - for basename, extension in file_ext_pairs - if extension == ".py" - or os.path.isdir(os.path.join(start_dir, basename)) - and basename not in ("__pycache__") - ] - - # Linting with flake8. # # We ignore the following rules: @@ -139,7 +123,6 @@ def _determine_local_import_names(start_dir: str) -> List[str]: "--show-source", "--builtin=gettext", "--max-complexity=20", - "--import-order-style=google", "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", "--max-line-length=88", @@ -149,14 +132,11 @@ def _determine_local_import_names(start_dir: str) -> List[str]: @nox.session def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: - session.install("flake8", "flake8-import-order") + session.install("flake8") else: - session.install("flake8", "flake8-import-order", "flake8-annotations") + session.install("flake8", "flake8-annotations") - local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ - "--application-import-names", - ",".join(local_names), ".", ] session.run("flake8", *args) diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index b053ca568..e8283c38d 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -18,7 +18,7 @@ import os from pathlib import Path import sys -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, Optional import nox @@ -109,22 +109,6 @@ def get_pytest_env_vars() -> Dict[str, str]: # -def _determine_local_import_names(start_dir: str) -> List[str]: - """Determines all import names that should be considered "local". - - This is used when running the linter to insure that import order is - properly checked. - """ - file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] - return [ - basename - for basename, extension in file_ext_pairs - if extension == ".py" - or os.path.isdir(os.path.join(start_dir, basename)) - and basename not in ("__pycache__") - ] - - # Linting with flake8. # # We ignore the following rules: @@ -139,7 +123,6 @@ def _determine_local_import_names(start_dir: str) -> List[str]: "--show-source", "--builtin=gettext", "--max-complexity=20", - "--import-order-style=google", "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", "--max-line-length=88", @@ -149,14 +132,11 @@ def _determine_local_import_names(start_dir: str) -> List[str]: @nox.session def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: - session.install("flake8", "flake8-import-order") + session.install("flake8") else: - session.install("flake8", "flake8-import-order", "flake8-annotations") + session.install("flake8", "flake8-annotations") - local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ - "--application-import-names", - ",".join(local_names), ".", ] session.run("flake8", *args) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index b053ca568..e8283c38d 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -18,7 +18,7 @@ import os from pathlib import Path import sys -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, Optional import nox @@ -109,22 +109,6 @@ def get_pytest_env_vars() -> Dict[str, str]: # -def _determine_local_import_names(start_dir: str) -> List[str]: - """Determines all import names that should be considered "local". - - This is used when running the linter to insure that import order is - properly checked. - """ - file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] - return [ - basename - for basename, extension in file_ext_pairs - if extension == ".py" - or os.path.isdir(os.path.join(start_dir, basename)) - and basename not in ("__pycache__") - ] - - # Linting with flake8. # # We ignore the following rules: @@ -139,7 +123,6 @@ def _determine_local_import_names(start_dir: str) -> List[str]: "--show-source", "--builtin=gettext", "--max-complexity=20", - "--import-order-style=google", "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", "--max-line-length=88", @@ -149,14 +132,11 @@ def _determine_local_import_names(start_dir: str) -> List[str]: @nox.session def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: - session.install("flake8", "flake8-import-order") + session.install("flake8") else: - session.install("flake8", "flake8-import-order", "flake8-annotations") + session.install("flake8", "flake8-annotations") - local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ - "--application-import-names", - ",".join(local_names), ".", ] session.run("flake8", *args) From 589c8bd7c91e73981d398d78e5b53a433caa6ff9 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 8 Dec 2022 14:34:37 -0500 Subject: [PATCH 044/536] build(deps): bump certifi from 2022.9.24 to 2022.12.7 in /synthtool/gcp/templates/python_library/.kokoro (#1432) Source-Link: https://github.com/googleapis/synthtool/commit/b4fe62efb5114b6738ad4b13d6f654f2bf4b7cc0 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3bf87e47c2173d7eed42714589dc4da2c07c3268610f1e47f8e1a30decbfc7f1 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/requirements.txt | 6 +++--- .pre-commit-config.yaml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index bb21147e4..fccaa8e84 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3abfa0f1886adaf0b83f07cb117b24a639ea1cb9cffe56d43280b977033563eb + digest: sha256:3bf87e47c2173d7eed42714589dc4da2c07c3268610f1e47f8e1a30decbfc7f1 diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 9c1b9be34..05dc4672e 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -20,9 +20,9 @@ cachetools==5.2.0 \ --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db # via google-auth -certifi==2022.9.24 \ - --hash=sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14 \ - --hash=sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382 +certifi==2022.12.7 \ + --hash=sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3 \ + --hash=sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18 # via requests cffi==1.15.1 \ --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 46d237160..5405cc8ff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: rev: 22.3.0 hooks: - id: black -- repo: https://gitlab.com/pycqa/flake8 +- repo: https://github.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 From e1aa9218ad22f85c9a6cab8b61d013779376a582 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Thu, 8 Dec 2022 15:53:38 -0800 Subject: [PATCH 045/536] deps: update dependencies (#1282) * update dependencies * deps: pyarrow extras * clean up comments * add test pyarrow skips * replace storage checks * update tests * update tests * Update setup.py * update system tests * update verify_pandas_imports * add pyarrow guards * add datetime check * change pyarrow import * update * add pyarrow skips * fix types * lint * Update google/cloud/bigquery/client.py Co-authored-by: Tim Swast * update pyarrow version * update test * lint * update pyarrow req * update noxfile * remove bignum check * remove comments * add test importorskip * update test * update test * update dependency * change version * update imports Co-authored-by: Anthonios Partheniou Co-authored-by: Tim Swast --- docs/snippets.py | 5 + google/cloud/bigquery/__init__.py | 5 + google/cloud/bigquery/_helpers.py | 74 +++++++++- google/cloud/bigquery/_pandas_helpers.py | 127 +++++++++------- google/cloud/bigquery/client.py | 55 ++++++- google/cloud/bigquery/exceptions.py | 25 ++++ google/cloud/bigquery/job/query.py | 6 +- google/cloud/bigquery/magics/magics.py | 11 ++ google/cloud/bigquery/table.py | 39 ++++- setup.py | 25 +++- testing/constraints-3.7.txt | 4 +- tests/system/test_client.py | 24 +++- tests/system/test_pandas.py | 27 ++-- tests/unit/job/test_query_pandas.py | 32 ++++- tests/unit/test__helpers.py | 71 +++++++++ tests/unit/test__pandas_helpers.py | 117 +++++++++++++-- tests/unit/test_client.py | 176 ++++++++++++++++++++++- tests/unit/test_dbapi__helpers.py | 6 + tests/unit/test_dbapi_connection.py | 22 ++- tests/unit/test_dbapi_cursor.py | 25 +++- tests/unit/test_magics.py | 77 +++++++++- tests/unit/test_table.py | 176 +++++++++++++++++++++-- tests/unit/test_table_pandas.py | 2 +- 23 files changed, 1013 insertions(+), 118 deletions(-) create mode 100644 google/cloud/bigquery/exceptions.py diff --git a/docs/snippets.py b/docs/snippets.py index 238fd52c3..05e4fa378 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -31,6 +31,11 @@ except (ImportError, AttributeError): pandas = None +try: + import pyarrow +except (ImportError, AttributeError): + pyarrow = None + from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 5a4520476..ebd5b3109 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -42,6 +42,8 @@ from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlTypeNames +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery.exceptions import LegacyPyarrowError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -195,6 +197,9 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", + # Custom exceptions + "LegacyBigQueryStorageError", + "LegacyPyarrowError", ] diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index b59bc86d3..014a721a8 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -20,7 +20,7 @@ import math import re import os -from typing import Optional, Union +from typing import Any, Optional, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -32,6 +32,11 @@ import packaging.version +from google.cloud.bigquery.exceptions import ( + LegacyBigQueryStorageError, + LegacyPyarrowError, +) + _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" @@ -50,6 +55,10 @@ r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) +_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") + +_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") + _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" @@ -83,7 +92,7 @@ def installed_version(self) -> packaging.version.Version: getattr(bigquery_storage, "__version__", "0.0.0") ) - return self._installed_version + return self._installed_version # type: ignore @property def is_read_session_optional(self) -> bool: @@ -93,6 +102,29 @@ def is_read_session_optional(self) -> bool: """ return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION + def verify_version(self): + """Verify that a recent enough version of BigQuery Storage extra is + installed. + + The function assumes that google-cloud-bigquery-storage extra is + installed, and should thus be used in places where this assumption + holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Raises: + LegacyBigQueryStorageError: + If the google-cloud-bigquery-storage package is outdated. + """ + if self.installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + class PyarrowVersions: """Version comparisons for pyarrow package.""" @@ -120,6 +152,44 @@ def installed_version(self) -> packaging.version.Version: def use_compliant_nested_type(self) -> bool: return self.installed_version.major >= 4 + def try_import(self, raise_if_error: bool = False) -> Any: + """Verify that a recent enough version of pyarrow extra is + installed. + + The function assumes that pyarrow extra is installed, and should thus + be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Returns: + The ``pyarrow`` module or ``None``. + + Raises: + LegacyPyarrowError: + If the pyarrow package is outdated and ``raise_if_error`` is ``True``. + """ + try: + import pyarrow + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise LegacyPyarrowError( + f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}." + ) from exc + return None + + if self.installed_version < _MIN_PYARROW_VERSION: + if raise_if_error: + msg = ( + "Dependency pyarrow is outdated, please upgrade " + f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})." + ) + raise LegacyPyarrowError(msg) + return None + + return pyarrow + BQ_STORAGE_VERSIONS = BQStorageVersions() PYARROW_VERSIONS = PyarrowVersions() diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 0d05f53a3..3d7e7d793 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -22,6 +22,11 @@ import queue import warnings +from packaging import version + +from google.cloud.bigquery import _helpers +from google.cloud.bigquery import schema + try: import pandas # type: ignore @@ -43,9 +48,7 @@ db_dtypes_import_exception = exc date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype - -import pyarrow # type: ignore -import pyarrow.parquet # type: ignore +pyarrow = _helpers.PYARROW_VERSIONS.try_import() try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` @@ -77,10 +80,6 @@ def _to_wkb(v): # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. _ARROW_COMPRESSION_SUPPORT = True -from google.cloud.bigquery import _helpers -from google.cloud.bigquery import schema - - _LOGGER = logging.getLogger(__name__) _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. @@ -141,52 +140,65 @@ def pyarrow_timestamp(): return pyarrow.timestamp("us", tz="UTC") -# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py -# When modifying it be sure to update it there as well. -BQ_TO_ARROW_SCALARS = { - "BIGNUMERIC": pyarrow_bignumeric, - "BOOL": pyarrow.bool_, - "BOOLEAN": pyarrow.bool_, - "BYTES": pyarrow.binary, - "DATE": pyarrow.date32, - "DATETIME": pyarrow_datetime, - "FLOAT": pyarrow.float64, - "FLOAT64": pyarrow.float64, - "GEOGRAPHY": pyarrow.string, - "INT64": pyarrow.int64, - "INTEGER": pyarrow.int64, - "NUMERIC": pyarrow_numeric, - "STRING": pyarrow.string, - "TIME": pyarrow_time, - "TIMESTAMP": pyarrow_timestamp, -} -ARROW_SCALAR_IDS_TO_BQ = { - # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes - pyarrow.bool_().id: "BOOL", - pyarrow.int8().id: "INT64", - pyarrow.int16().id: "INT64", - pyarrow.int32().id: "INT64", - pyarrow.int64().id: "INT64", - pyarrow.uint8().id: "INT64", - pyarrow.uint16().id: "INT64", - pyarrow.uint32().id: "INT64", - pyarrow.uint64().id: "INT64", - pyarrow.float16().id: "FLOAT64", - pyarrow.float32().id: "FLOAT64", - pyarrow.float64().id: "FLOAT64", - pyarrow.time32("ms").id: "TIME", - pyarrow.time64("ns").id: "TIME", - pyarrow.timestamp("ns").id: "TIMESTAMP", - pyarrow.date32().id: "DATE", - pyarrow.date64().id: "DATETIME", # because millisecond resolution - pyarrow.binary().id: "BYTES", - pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", -} +if pyarrow: + # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py + # When modifying it be sure to update it there as well. + BQ_TO_ARROW_SCALARS = { + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, + } + ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + } + + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BIGNUMERIC_SUPPORT = True + else: + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER + +else: # pragma: NO COVER + BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER + ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER + + BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { "GEOGRAPHY": { b"ARROW:extension:name": b"google:sqlType:geography", @@ -480,6 +492,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # If schema detection was not successful for all columns, also try with # pyarrow, if available. if unknown_type_fields: + if not pyarrow: + msg = "Could not determine the type of columns: {}".format( + ", ".join(field.name for field in unknown_type_fields) + ) + warnings.warn(msg) + return None # We cannot detect the schema in full. + # The augment_schema() helper itself will also issue unknown type # warnings if detection still fails for any of the fields. bq_schema_out = augment_schema(dataframe, bq_schema_out) @@ -654,6 +673,8 @@ def dataframe_to_parquet( This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``. """ + pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) + import pyarrow.parquet # type: ignore kwargs = ( diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 1f3647e71..1885ab67e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -27,6 +27,7 @@ import json import math import os +import packaging.version import tempfile import typing from typing import ( @@ -44,6 +45,13 @@ import uuid import warnings +try: + import pyarrow # type: ignore + + _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) +except ImportError: # pragma: NO COVER + pyarrow = None + from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -56,9 +64,14 @@ import google.cloud._helpers # type: ignore from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error -from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( - DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, -) + +try: + from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, + ) +except ImportError: + DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore + from google.cloud.bigquery import _job_helpers from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id @@ -67,6 +80,7 @@ from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._helpers import _get_bigquery_host +from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _DEFAULT_HOST from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers @@ -75,6 +89,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -144,6 +159,9 @@ TIMEOUT_HEADER = "X-Server-Timeout" +# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 +_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -519,8 +537,20 @@ def _ensure_bqstorage_client( Returns: A BigQuery Storage API client. """ - from google.cloud import bigquery_storage + try: + from google.cloud import bigquery_storage # type: ignore + except ImportError: + warnings.warn( + "Cannot create BigQuery Storage client, the dependency " + "google-cloud-bigquery-storage is not installed." + ) + return None + try: + BQ_STORAGE_VERSIONS.verify_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return None if bqstorage_client is None: bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, @@ -2529,6 +2559,9 @@ def load_table_from_dataframe( google.cloud.bigquery.job.LoadJob: A new load job. Raises: + ValueError: + If a usable parquet engine cannot be found. This method + requires :mod:`pyarrow` to be installed. TypeError: If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. @@ -2566,6 +2599,10 @@ def load_table_from_dataframe( ) ) + if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: + # pyarrow is now the only supported parquet engine. + raise ValueError("This method requires pyarrow to be installed") + if location is None: location = self.location @@ -2621,6 +2658,16 @@ def load_table_from_dataframe( try: if job_config.source_format == job.SourceFormat.PARQUET: + if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: + msg = ( + "Loading dataframe data in PARQUET format with pyarrow " + f"{_PYARROW_VERSION} can result in data corruption. It is " + "therefore *strongly* advised to use a different pyarrow " + "version or a different source format. " + "See: https://github.com/googleapis/python-bigquery/issues/781" + ) + warnings.warn(msg, category=RuntimeWarning) + if job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py new file mode 100644 index 000000000..2bab97fea --- /dev/null +++ b/google/cloud/bigquery/exceptions.py @@ -0,0 +1,25 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BigQueryError(Exception): + """Base class for all custom exceptions defined by the BigQuery client.""" + + +class LegacyBigQueryStorageError(BigQueryError): + """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" + + +class LegacyPyarrowError(BigQueryError): + """Raised when too old a version of pyarrow package is detected at runtime.""" diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index b0286deae..e32e74129 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1593,6 +1593,10 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. + Raises: + ValueError: + If the :mod:`pyarrow` library cannot be imported. + .. versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) @@ -1694,7 +1698,7 @@ def to_dataframe( # that should only exist here in the QueryJob method. def to_geodataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 613cc1b58..f92f77541 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -747,6 +747,17 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None + try: + from google.cloud import bigquery_storage # type: ignore # noqa: F401 + except ImportError as err: + customized_error = ImportError( + "The default BigQuery Storage API client cannot be used, install " + "the missing google-cloud-bigquery-storage and pyarrow packages " + "to use it. Alternatively, use the classic REST API by specifying " + "the --use_rest_api magic option." + ) + raise customized_error from err + try: from google.api_core.gapic_v1 import client_info as gapic_client_info except ImportError as err: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 96888d62d..a2110a9fb 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -29,7 +29,10 @@ except ImportError: # pragma: NO COVER pandas = None -import pyarrow # type: ignore +try: + import pyarrow # type: ignore +except ImportError: # pragma: NO COVER + pyarrow = None try: import geopandas # type: ignore @@ -52,6 +55,7 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -63,8 +67,9 @@ # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas - import geopandas - from google.cloud import bigquery_storage + import pyarrow + import geopandas # type: ignore + from google.cloud import bigquery_storage # type: ignore from google.cloud.bigquery.dataset import DatasetReference @@ -72,6 +77,10 @@ "The geopandas library is not installed, please install " "geopandas to use the to_geodataframe() function." ) +_NO_PYARROW_ERROR = ( + "The pyarrow library is not installed, please install " + "pyarrow to use the to_arrow() function." +) _NO_SHAPELY_ERROR = ( "The shapely library is not installed, please install " "shapely to use the geography_as_object option." @@ -1585,6 +1594,17 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): if self.max_results is not None: return False + try: + from google.cloud import bigquery_storage # noqa: F401 + except ImportError: + return False + + try: + _helpers.BQ_STORAGE_VERSIONS.verify_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return False + return True def _get_next_page_response(self): @@ -1654,7 +1674,7 @@ def _to_page_iterable( def to_arrow_iterable( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore ) -> Iterator["pyarrow.RecordBatch"]: """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. @@ -1761,8 +1781,15 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. + Raises: + ValueError: If the :mod:`pyarrow` library cannot be imported. + + .. versionadded:: 1.17.0 """ + if pyarrow is None: + raise ValueError(_NO_PYARROW_ERROR) + self._maybe_warn_max_results(bqstorage_client) if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): @@ -2041,7 +2068,7 @@ def __can_cast_timestamp_ns(column): # changes to job.QueryJob.to_geodataframe() def to_geodataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, @@ -2195,6 +2222,8 @@ def to_arrow( Returns: pyarrow.Table: An empty :class:`pyarrow.Table`. """ + if pyarrow is None: + raise ValueError(_NO_PYARROW_ERROR) return pyarrow.Table.from_arrays(()) def to_dataframe( diff --git a/setup.py b/setup.py index 5fc694c6f..9e1bfbbce 100644 --- a/setup.py +++ b/setup.py @@ -34,8 +34,7 @@ # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", - "proto-plus >= 1.22.0, <2.0.0dev", + "proto-plus >= 1.15.0, <2.0.0dev", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 @@ -44,16 +43,30 @@ "packaging >= 14.3, <22.0.0dev", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", - "pyarrow >= 3.0.0, < 11.0dev", "requests >= 2.21.0, < 3.0.0dev", ] +pyarrow_dependency = "pyarrow >= 3.0.0" extras = { # Keep the no-op bqstorage extra for backward compatibility. # See: https://github.com/googleapis/python-bigquery/issues/757 - "bqstorage": [], - "pandas": ["pandas>=1.0.0", "db-dtypes>=0.3.0,<2.0.0dev"], + "bqstorage": [ + "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", + # Due to an issue in pip's dependency resolver, the `grpc` extra is not + # installed, even though `google-cloud-bigquery-storage` specifies it + # as `google-api-core[grpc]`. We thus need to explicitly specify it here. + # See: https://github.com/googleapis/python-bigquery/issues/83 The + # grpc.Channel.close() method isn't added until 1.32.0. + # https://github.com/grpc/grpc/pull/15254 + "grpcio >= 1.47.0, < 2.0dev", + pyarrow_dependency, + ], + "pandas": [ + "pandas>=1.1.0", + pyarrow_dependency, + "db-dtypes>=0.3.0,<2.0.0dev", + ], "ipywidgets": ["ipywidgets==7.7.1"], - "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <2.0dev"], "ipython": ["ipython>=7.0.1,!=8.1.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 2c5b169db..149d6c496 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -23,6 +23,6 @@ protobuf==3.19.5 pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.21.0 -Shapely==1.6.4.post2 +Shapely==1.8.4 six==1.13.0 -tqdm==4.7.4 \ No newline at end of file +tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 25edc18e1..575898209 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -42,14 +42,11 @@ from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums -from google.cloud import bigquery_storage from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient import psutil import pytest -import pyarrow -import pyarrow.types from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState from test_utils.retry import RetryResult @@ -57,6 +54,16 @@ from . import helpers +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + +try: + import pyarrow + import pyarrow.types +except ImportError: # pragma: NO COVER + pyarrow = None JOB_TIMEOUT = 120 # 2 minutes DATA_PATH = pathlib.Path(__file__).parent.parent / "data" @@ -1738,6 +1745,10 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials @@ -1796,6 +1807,9 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_dbapi_connection_does_not_leak_sockets(self): current_process = psutil.Process() conn_count_start = len(current_process.connections()) @@ -2263,6 +2277,10 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 34e4243c4..91305b450 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -27,7 +27,7 @@ import pytest from google.cloud import bigquery -from google.cloud import bigquery_storage + from google.cloud.bigquery import enums from . import helpers @@ -36,6 +36,9 @@ pandas = pytest.importorskip("pandas", minversion="0.23.0") numpy = pytest.importorskip("numpy") +bigquery_storage = pytest.importorskip( + "google.cloud.bigquery_storage", minversion="2.0.0" +) PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") @@ -373,10 +376,10 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), ) num_rows = 100 @@ -390,10 +393,10 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), - ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), + ("bignum_col", nulls), ] df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -469,10 +472,10 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), ) df_data = [ @@ -502,14 +505,6 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id decimal.Decimal("99999999999999999999999999999.999999999"), ], ), - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ), ("str_col", ["abc", None, "def"]), ( "time_col", @@ -525,6 +520,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ] df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index a45401664..a2444efdd 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -17,12 +17,15 @@ import json import mock -import pyarrow import pytest -from google.cloud import bigquery_storage -import google.cloud.bigquery_storage_v1.reader -import google.cloud.bigquery_storage_v1.services.big_query_read.client + +try: + from google.cloud import bigquery_storage + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.services.big_query_read.client +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage = None try: import pandas @@ -47,6 +50,12 @@ pandas = pytest.importorskip("pandas") +try: + import pyarrow + import pyarrow.types +except ImportError: # pragma: NO COVER + pyarrow = None + @pytest.fixture def table_read_options_kwarg(): @@ -89,6 +98,9 @@ def test__contains_order_by(query, expected): assert not mut._contains_order_by(query) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.parametrize( "query", ( @@ -179,6 +191,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow(): from google.cloud.bigquery.job import QueryJob as target_class @@ -265,6 +278,7 @@ def test_to_arrow(): ] +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class @@ -300,6 +314,7 @@ def test_to_arrow_max_results_no_progress_bar(): assert tbl.num_rows == 2 +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_arrow_w_tqdm_w_query_plan(tqdm_mock): @@ -356,6 +371,7 @@ def test_to_arrow_w_tqdm_w_query_plan(tqdm_mock): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_arrow_w_tqdm_w_pending_status(tqdm_mock): @@ -408,6 +424,7 @@ def test_to_arrow_w_tqdm_w_pending_status(tqdm_mock): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_arrow_w_tqdm_wo_query_plan(tqdm_mock): @@ -510,6 +527,9 @@ def test_to_dataframe_ddl_query(): assert len(df) == 0 +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class @@ -584,6 +604,9 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): bqstorage_client.read_rows.assert_called_once_with(stream_id) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_to_dataframe_bqstorage_no_pyarrow_compression(): from google.cloud.bigquery.job import QueryJob as target_class @@ -629,6 +652,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 2e714c707..4fb86f665 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,7 +19,18 @@ import mock +try: + from google.cloud import bigquery_storage # type: ignore +except ImportError: # pragma: NO COVER + bigquery_storage = None +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + + +@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): def tearDown(self): from google.cloud.bigquery import _helpers @@ -32,6 +43,37 @@ def _object_under_test(self): return _helpers.BQStorageVersions() + def _call_fut(self): + from google.cloud.bigquery import _helpers + + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + return _helpers.BQ_STORAGE_VERSIONS.verify_version() + + def test_raises_no_error_w_recent_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): + try: + self._call_fut() + except LegacyBigQueryStorageError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_raises_error_w_legacy_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + with self.assertRaises(LegacyBigQueryStorageError): + self._call_fut() + + def test_raises_error_w_unknown_bqstorage_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: + del fake_module.__version__ + error_pattern = r"version found: 0.0.0" + with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): + self._call_fut() + def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() @@ -58,6 +100,7 @@ def test_is_read_session_optional_false(self): assert not versions.is_read_session_optional +@unittest.skipIf(pyarrow is None, "Requires `pyarrow`") class TestPyarrowVersions(unittest.TestCase): def tearDown(self): from google.cloud.bigquery import _helpers @@ -70,6 +113,34 @@ def _object_under_test(self): return _helpers.PyarrowVersions() + def _call_try_import(self, **kwargs): + from google.cloud.bigquery import _helpers + + _helpers.PYARROW_VERSIONS._installed_version = None + return _helpers.PYARROW_VERSIONS.try_import(**kwargs) + + def test_try_import_raises_no_error_w_recent_pyarrow(self): + from google.cloud.bigquery.exceptions import LegacyPyarrowError + + with mock.patch("pyarrow.__version__", new="5.0.0"): + try: + pyarrow = self._call_try_import(raise_if_error=True) + self.assertIsNotNone(pyarrow) + except LegacyPyarrowError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_try_import_returns_none_w_legacy_pyarrow(self): + with mock.patch("pyarrow.__version__", new="2.0.0"): + pyarrow = self._call_try_import() + self.assertIsNone(pyarrow) + + def test_try_import_raises_error_w_legacy_pyarrow(self): + from google.cloud.bigquery.exceptions import LegacyPyarrowError + + with mock.patch("pyarrow.__version__", new="2.0.0"): + with self.assertRaises(LegacyPyarrowError): + self._call_try_import(raise_if_error=True) + def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 1a3f918eb..885cd318c 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -30,9 +30,6 @@ except ImportError: # pragma: NO COVER pandas = None -import pyarrow -import pyarrow.types - try: import geopandas except ImportError: # pragma: NO COVER @@ -41,10 +38,28 @@ import pytest from google import api_core -from google.cloud import bigquery_storage + +from google.cloud.bigquery import exceptions from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() +if pyarrow: + import pyarrow.parquet + import pyarrow.types +else: # pragma: NO COVER + # Mock out pyarrow when missing, because methods from pyarrow.types are + # used in test parameterization. + pyarrow = mock.Mock() + +try: + from google.cloud import bigquery_storage + + _helpers.BQ_STORAGE_VERSIONS.verify_version() +except ImportError: # pragma: NO COVER + bigquery_storage = None PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -55,6 +70,12 @@ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") +skip_if_no_bignumeric = pytest.mark.skipif( + not _BIGNUMERIC_SUPPORT, + reason="BIGNUMERIC support requires pyarrow>=3.0.0", +) + + @pytest.fixture def module_under_test(): from google.cloud.bigquery import _pandas_helpers @@ -110,6 +131,7 @@ def all_(*functions): return functools.partial(do_all, functions) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) @@ -142,7 +164,12 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - ("BIGNUMERIC", "NULLABLE", is_bignumeric), + pytest.param( + "BIGNUMERIC", + "NULLABLE", + is_bignumeric, + marks=skip_if_no_bignumeric, + ), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -221,10 +248,11 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - ( + pytest.param( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), + marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -280,6 +308,7 @@ def test_all_(): ("UNKNOWN_TYPE", "REPEATED", is_none), ], ) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type): field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode) actual = module_under_test.bq_to_arrow_data_type(field) @@ -287,6 +316,7 @@ def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_t @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -334,6 +364,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -381,6 +412,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): assert actual.value_type.equals(expected_value_type) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -417,7 +449,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - ( + pytest.param( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -479,6 +511,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): series = pandas.Series(rows, dtype="object") bq_field = schema.SchemaField("field_name", bq_type) @@ -513,6 +546,7 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] series = pandas.Series(rows) @@ -523,6 +557,7 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] series = pandas.Series(rows, dtype="object") @@ -534,6 +569,7 @@ def test_bq_to_arrow_array_w_arrays(module_under_test): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): rows = [ {"int_col": 123, "string_col": "abc"}, @@ -555,6 +591,7 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_special_floats(module_under_test): bq_field = schema.SchemaField("field_name", "FLOAT64") rows = [float("-inf"), float("nan"), float("inf"), None] @@ -622,6 +659,7 @@ def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): assert array.to_pylist() == list(series) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -647,6 +685,7 @@ def test_get_column_or_index_not_found(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_get_column_or_index_with_multiindex_not_found(module_under_test): dataframe = pandas.DataFrame( {"column_name": [1, 2, 3, 4, 5, 6]}, @@ -984,6 +1023,7 @@ def test_dataframe_to_arrow_with_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), @@ -1040,6 +1080,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), @@ -1072,6 +1113,7 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, @@ -1093,6 +1135,19 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): + mock_pyarrow_import = mock.Mock() + mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError( + "pyarrow not installed" + ) + monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import) + + with pytest.raises(exceptions.LegacyPyarrowError): + module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_extra_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1104,6 +1159,7 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_missing_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1115,6 +1171,7 @@ def test_dataframe_to_parquet_w_missing_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_compression_method(module_under_test): bq_schema = (schema.SchemaField("field00", "STRING"),) dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) @@ -1134,6 +1191,34 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): + dataframe = pandas.DataFrame( + data=[ + {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, + {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)}, + ] + ) + + no_pyarrow_patch = mock.patch(module_under_test.__name__ + ".pyarrow", None) + + with no_pyarrow_patch, warnings.catch_warnings(record=True) as warned: + detected_schema = module_under_test.dataframe_to_bq_schema( + dataframe, bq_schema=[] + ) + + assert detected_schema is None + + # a warning should also be issued + expected_warnings = [ + warning for warning in warned if "could not determine" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + msg = str(expected_warnings[0]) + assert "execution_date" in msg and "created_at" in msg + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1163,6 +1248,7 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1249,6 +1335,7 @@ def test__first_array_valid_no_arrays_with_valid_items(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1315,6 +1402,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_repeated_fields(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1427,6 +1515,7 @@ def test_augment_schema_type_detection_fails_array_data(module_under_test): assert "all_none_array" in warning_msg and "empty_array" in warning_msg +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): pandas = pytest.importorskip("pandas") @@ -1457,6 +1546,9 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test__download_table_bqstorage_stream_includes_read_session( monkeypatch, module_under_test ): @@ -1487,7 +1579,8 @@ def test__download_table_bqstorage_stream_includes_read_session( @pytest.mark.skipif( - not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + bigquery_storage is None + or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", ) def test__download_table_bqstorage_stream_omits_read_session( @@ -1527,6 +1620,9 @@ def test__download_table_bqstorage_stream_omits_read_session( (7, {"max_queue_size": None}, 7, 0), # infinite queue size ], ) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test__download_table_bqstorage( module_under_test, stream_count, @@ -1577,6 +1673,7 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1612,6 +1709,7 @@ def test_download_arrow_row_iterator_unknown_field_type(module_under_test): assert col.to_pylist() == [2.2, 22.22, 222.222] +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1646,6 +1744,7 @@ def test_download_arrow_row_iterator_known_field_type(module_under_test): assert col.to_pylist() == ["2.2", "22.22", "222.222"] +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1712,6 +1811,7 @@ def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test assert isinstance(dataframe, pandas.DataFrame) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_field_type_override(module_under_test): # When loading pandas data, we may need to override the type # decision based on data contents, because GEOGRAPHY data can be @@ -1744,6 +1844,7 @@ def test_bq_to_arrow_field_type_override(module_under_test): ), ], ) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata): assert ( module_under_test.bq_to_arrow_field( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f4552cda2..22f7286db 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -27,6 +27,7 @@ import warnings import mock +import packaging import requests import pytest import pkg_resources @@ -53,15 +54,25 @@ msg = "Error importing from opentelemetry, is the installed version compatible?" raise ImportError(msg) from exc +try: + import pyarrow +except (ImportError, AttributeError): # pragma: NO COVER + pyarrow = None + import google.api_core.exceptions from google.api_core import client_info import google.cloud._helpers from google.cloud import bigquery -from google.cloud import bigquery_storage + from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from google.cloud.bigquery import ParquetOptions +try: + from google.cloud import bigquery_storage +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage = None +from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -614,6 +625,9 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() @@ -636,6 +650,55 @@ def test_ensure_bqstorage_client_creating_new_instance(self): client_info=mock.sentinel.client_info, ) + def test_ensure_bqstorage_client_missing_dependency(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning + for warning in warned + if "not installed" in str(warning) + and "google-cloud-bigquery-storage" in str(warning) + ] + assert matching_warnings, "Missing dependency warning not raised." + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_obsolete_dependency(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + patcher = mock.patch( + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_ensure_bqstorage_client_existing_client_check_passes(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -647,6 +710,29 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): self.assertIs(bqstorage_client, mock_storage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_fails(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + patcher = mock.patch( + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference @@ -6833,6 +6919,7 @@ def test_load_table_from_file_w_invalid_job_config(self): assert "Expected an instance of LoadJobConfig" in err_msg @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6928,6 +7015,7 @@ def test_load_table_from_dataframe(self): assert "description" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_client_location(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6972,6 +7060,7 @@ def test_load_table_from_dataframe_w_client_location(self): assert sent_config.source_format == job.SourceFormat.PARQUET @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7026,6 +7115,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7081,6 +7171,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_parquet_options_none(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7132,6 +7223,7 @@ def test_load_table_from_dataframe_w_parquet_options_none(self): assert sent_config.parquet_options.enable_list_inference is True @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_none(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7191,6 +7283,7 @@ def test_load_table_from_dataframe_w_list_inference_none(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_false(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7251,6 +7344,7 @@ def test_load_table_from_dataframe_w_list_inference_false(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): from google.cloud.bigquery import job @@ -7270,6 +7364,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(sel assert "Got unexpected source_format:" in str(exc.value) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7370,6 +7465,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7429,6 +7525,7 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): assert sent_config.schema is None @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_index_and_auto_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7490,6 +7587,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): assert sent_schema == expected_sent_schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_unknown_table(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -7528,6 +7626,7 @@ def test_load_table_from_dataframe_unknown_table(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7575,6 +7674,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) + # @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7619,6 +7719,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7678,6 +7779,7 @@ def test_load_table_from_dataframe_struct_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields(self): """Test that a DataFrame with array columns can be uploaded correctly. @@ -7742,6 +7844,7 @@ def test_load_table_from_dataframe_array_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields_w_auto_schema(self): """Test that a DataFrame with array columns can be uploaded correctly. @@ -7804,6 +7907,7 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): assert sent_config.schema == expected_schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7887,6 +7991,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_extra_types(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7923,6 +8028,7 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7955,6 +8061,74 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): assert call_args.kwargs.get("parquet_compression") == "LZ4" @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) + to_parquet_patch = mock.patch.object( + dataframe, "to_parquet", wraps=dataframe.to_parquet + ) + + with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch: + with pytest.raises(ValueError): + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + location=self.LOCATION, + parquet_compression="gzip", + ) + + def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): + pytest.importorskip("pandas", reason="Requires `pandas`") + pytest.importorskip("pyarrow", reason="Requires `pyarrow`") + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + pyarrow_version_patch = mock.patch( + "google.cloud.bigquery.client._PYARROW_VERSION", + packaging.version.parse("2.0.0"), # A known bad version of pyarrow. + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch, get_table_patch, pyarrow_version_patch: + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + location=self.LOCATION, + ) + + expected_warnings = [ + warning for warning in warned if "pyarrow" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert issubclass(expected_warnings[0].category, RuntimeWarning) + msg = str(expected_warnings[0].message) + assert "pyarrow 2.0.0" in msg + assert "data corruption" in msg + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): """Test that a DataFrame with null columns can be uploaded if a BigQuery schema is specified. diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 7cc1f11c3..fae0c17e9 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -21,6 +21,11 @@ import pytest +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + import google.cloud._helpers from google.cloud.bigquery import query, table from google.cloud.bigquery.dbapi import _helpers @@ -210,6 +215,7 @@ def test_empty_iterable(self): result = _helpers.to_bq_table_rows(rows_iterable) self.assertEqual(list(result), []) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_non_empty_iterable(self): rows_iterable = [ dict( diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index e96ab55d7..67777f923 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -17,7 +17,10 @@ import mock -from google.cloud import bigquery_storage +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None class TestConnection(unittest.TestCase): @@ -37,6 +40,8 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): + # Assumption: bigquery_storage exists. It's the test's responisbility to + # not use this helper or skip itself if bqstorage is not installed. mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client._transport = mock.Mock(spec=["channel"]) mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) @@ -53,6 +58,9 @@ def test_ctor_wo_bqstorage_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, None) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_ctor_w_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection @@ -82,6 +90,9 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -97,6 +108,9 @@ def test_connect_w_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_connect_w_both_clients(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -130,6 +144,9 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -152,6 +169,9 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index d672c0f6c..b550bbce0 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -18,8 +18,17 @@ import pytest +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + from google.api_core import exceptions -from google.cloud import bigquery_storage + +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None from tests.unit.helpers import _to_pyarrow @@ -269,6 +278,10 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -322,6 +335,9 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi @@ -344,6 +360,9 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): # check the data returned self.assertEqual(rows, []) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -375,6 +394,10 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): # the default client was not used mock_client.list_rows.assert_not_called() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index fdfb16d16..c0aa5d85e 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -76,6 +76,19 @@ def ipython_ns_cleanup(): del ip.user_ns[name] +@pytest.fixture(scope="session") +def missing_bq_storage(): + """Provide a patcher that can make the bigquery storage import to fail.""" + + def fail_if(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + return maybe_fail_import(predicate=fail_if) + + @pytest.fixture(scope="session") def missing_grpcio_lib(): """Provide a patcher that can make the gapic library import to fail.""" @@ -310,6 +323,9 @@ def test__make_bqstorage_client_false(): assert got is None +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -321,6 +337,53 @@ def test__make_bqstorage_client_true(): assert isinstance(got, bigquery_storage.BigQueryReadClient) +def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + + with pytest.raises(ImportError) as exc_context, missing_bq_storage: + magics._make_bqstorage_client(test_client, True, {}) + + error_msg = str(exc_context.value) + assert "google-cloud-bigquery-storage" in error_msg + assert "pyarrow" in error_msg + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__make_bqstorage_client_true_obsolete_dependency(): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + + patcher = mock.patch( + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + got = magics._make_bqstorage_client(test_client, True, {}) + + assert got is None + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): credentials_mock = mock.create_autospec( @@ -376,6 +439,9 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -538,9 +604,10 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): - pandas = pytest.importorskip("pandas") - ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -603,6 +670,9 @@ def warning_match(warning): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_bigquery_magic_with_rest_client_requested(monkeypatch): pandas = pytest.importorskip("pandas") @@ -830,6 +900,9 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip = IPython.get_ipython() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index f542c7523..a79b98881 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -22,18 +22,33 @@ import warnings import mock -import pyarrow -import pyarrow.types +import pkg_resources import pytest import google.api_core.exceptions +from test_utils.imports import maybe_fail_import from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.dataset import DatasetReference -from google.cloud import bigquery_storage -from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( - grpc as big_query_read_grpc_transport, -) +try: + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) +except ImportError: # pragma: NO COVER + bigquery_storage = None + big_query_read_grpc_transport = None + +from google.cloud.bigquery import _helpers + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() +PYARROW_VERSION = pkg_resources.parse_version("0.0.1") + +if pyarrow: + import pyarrow.types + + PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) try: import pandas @@ -52,7 +67,7 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None -from google.cloud.bigquery.dataset import DatasetReference +PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") def _mock_client(): @@ -1902,12 +1917,20 @@ def test_total_rows_eq_zero(self): row_iterator = self._make_one() self.assertEqual(row_iterator.total_rows, 0) + @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) + def test_to_arrow_error_if_pyarrow_is_none(self): + row_iterator = self._make_one() + with self.assertRaises(ValueError): + row_iterator.to_arrow() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): row_iterator = self._make_one() tbl = row_iterator.to_arrow() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 0) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): row_iterator = self._make_one() arrow_iter = row_iterator.to_arrow_iterable() @@ -2192,6 +2215,49 @@ def test__validate_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): + iterator = self._make_one(first_page_response=None) # not cached + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + iterator = self._make_one(first_page_response=None) # not cached + + patcher = mock.patch( + "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): from google.cloud.bigquery.schema import SchemaField @@ -2292,6 +2358,10 @@ def test_to_arrow_iterable(self): [[{"name": "Bepples Phlyntstone", "age": 0}, {"name": "Dino", "age": 4}]], ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2367,6 +2437,7 @@ def test_to_arrow_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2448,6 +2519,7 @@ def test_to_arrow(self): ], ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_nulls(self): from google.cloud.bigquery.schema import SchemaField @@ -2480,6 +2552,7 @@ def test_to_arrow_w_nulls(self): self.assertEqual(names, ["Donkey", "Diddy", "Dixie", None]) self.assertEqual(ages, [32, 29, None, 111]) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_unknown_type(self): from google.cloud.bigquery.schema import SchemaField @@ -2522,6 +2595,7 @@ def test_to_arrow_w_unknown_type(self): warning = warned[0] self.assertTrue("sport" in str(warning)) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): from google.cloud.bigquery.schema import SchemaField @@ -2560,6 +2634,10 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2600,6 +2678,10 @@ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): ) mock_client._ensure_bqstorage_client.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2636,6 +2718,10 @@ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2713,6 +2799,10 @@ def test_to_arrow_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2740,6 +2830,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField @@ -2755,17 +2846,25 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() - mock_client._ensure_bqstorage_client.return_value = None row_iterator = self._make_one(mock_client, api_request, path, schema) - tbl = row_iterator.to_arrow(create_bqstorage_client=True) + def mock_verify_version(): + raise _helpers.LegacyBigQueryStorageError("no bqstorage") - # The client attempted to create a BQ Storage client, and even though - # that was not possible, results were still returned without errors. - mock_client._ensure_bqstorage_client.assert_called_once() + with mock.patch( + "google.cloud.bigquery._helpers.BQ_STORAGE_VERSIONS.verify_version", + mock_verify_version, + ): + tbl = row_iterator.to_arrow(create_bqstorage_client=True) + + mock_client._ensure_bqstorage_client.assert_not_called() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2802,6 +2901,7 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[1].name, "colC") self.assertEqual(actual_table.schema[2].name, "colB") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") @mock.patch("tqdm.notebook.tqdm") @@ -2936,6 +3036,10 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["age"][0], 33) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3100,6 +3204,7 @@ def test_to_dataframe(self): self.assertEqual(df.age.dtype.name, "Int64") @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -3127,6 +3232,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -3575,6 +3681,9 @@ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3603,6 +3712,9 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3628,7 +3740,11 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table @@ -3650,6 +3766,10 @@ def test_to_dataframe_w_bqstorage_logs_session(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3701,6 +3821,10 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertTrue(got.empty) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3777,6 +3901,10 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3827,6 +3955,10 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertTrue(got.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm") def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): @@ -3902,6 +4034,10 @@ def blocking_to_arrow(*args, **kwargs): tqdm_mock().close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4018,6 +4154,9 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): self.assertTrue(df.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut @@ -4036,6 +4175,9 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_partition(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4053,6 +4195,9 @@ def test_to_dataframe_w_bqstorage_partition(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_snapshot(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4071,6 +4216,10 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4773,6 +4922,9 @@ def test_set_expiration_w_none(self): assert time_partitioning._properties["expirationMs"] is None +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.parametrize( "table_path", ( diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 943baa326..5778467a5 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -16,12 +16,12 @@ import decimal from unittest import mock -import pyarrow import pytest from google.cloud import bigquery pandas = pytest.importorskip("pandas") +pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data" From 89f8e9b84b013eac18f94a73e8533c14e607c68a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 12 Dec 2022 09:23:02 -0600 Subject: [PATCH 046/536] chore(main): release 3.4.1 (#1416) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 294e5b42f..d6f0abc85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.4.1](https://github.com/googleapis/python-bigquery/compare/v3.4.0...v3.4.1) (2022-12-09) + + +### Documentation + +* Add info about streaming quota limits to `insert_rows*` methods ([#1409](https://github.com/googleapis/python-bigquery/issues/1409)) ([0f08e9a](https://github.com/googleapis/python-bigquery/commit/0f08e9a8ff638e78006d71acd974de2dff89b5d9)) + + +### Dependencies + +* make pyarrow and BQ Storage optional dependencies ([e1aa921](https://github.com/googleapis/python-bigquery/commit/e1aa9218ad22f85c9a6cab8b61d013779376a582)) + ## [3.4.0](https://github.com/googleapis/python-bigquery/compare/v3.3.6...v3.4.0) (2022-11-17) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 6b822f0c1..71133df01 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.4.0" +__version__ = "3.4.1" From 8ad2e5bc1c04bf16fffe4c8773e722b68117c916 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Tue, 13 Dec 2022 16:57:22 -0600 Subject: [PATCH 047/536] docs: created samples for load table and create table from schema file (#1436) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: created samples for load table and create table from schema file * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Apply suggestions from code review Co-authored-by: Tim Swast * Update samples/snippets/create_table_schema_from_json.py Co-authored-by: Tim Swast Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- .../snippets/create_table_schema_from_json.py | 42 +++++++++++++ .../create_table_schema_from_json_test.py | 32 ++++++++++ samples/snippets/dataset_access_test.py | 2 +- samples/snippets/delete_job.py | 2 +- .../snippets/load_table_schema_from_json.py | 60 +++++++++++++++++++ .../load_table_schema_from_json_test.py | 32 ++++++++++ samples/snippets/materialized_view.py | 1 + samples/snippets/quickstart_test.py | 1 - samples/snippets/schema.json | 20 +++++++ samples/snippets/schema_us_states.json | 12 ++++ samples/snippets/user_credentials_test.py | 1 - 11 files changed, 201 insertions(+), 4 deletions(-) create mode 100644 samples/snippets/create_table_schema_from_json.py create mode 100644 samples/snippets/create_table_schema_from_json_test.py create mode 100644 samples/snippets/load_table_schema_from_json.py create mode 100644 samples/snippets/load_table_schema_from_json_test.py create mode 100644 samples/snippets/schema.json create mode 100644 samples/snippets/schema_us_states.json diff --git a/samples/snippets/create_table_schema_from_json.py b/samples/snippets/create_table_schema_from_json.py new file mode 100644 index 000000000..b866e2ebe --- /dev/null +++ b/samples/snippets/create_table_schema_from_json.py @@ -0,0 +1,42 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pathlib + + +def create_table(table_id: str) -> None: + orig_table_id = table_id + current_directory = pathlib.Path(__file__).parent + orig_schema_path = str(current_directory / "schema.json") + # [START bigquery_schema_file_create] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table_name" + # TODO(dev): Change schema_path variable to the path of your schema file. + schema_path = "path/to/schema.json" + # [END bigquery_schema_file_create] + table_id = orig_table_id + schema_path = orig_schema_path + + # [START bigquery_schema_file_create] + # To load a schema file use the schema_from_json method. + schema = client.schema_from_json(schema_path) + + table = bigquery.Table(table_id, schema=schema) + table = client.create_table(table) # API request + print(f"Created table {table_id}.") + # [END bigquery_schema_file_create] diff --git a/samples/snippets/create_table_schema_from_json_test.py b/samples/snippets/create_table_schema_from_json_test.py new file mode 100644 index 000000000..e99b92672 --- /dev/null +++ b/samples/snippets/create_table_schema_from_json_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import create_table_schema_from_json + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + create_table_schema_from_json.create_table(random_table_id) + + out, _ = capsys.readouterr() + assert "Created" in out + assert random_table_id in out diff --git a/samples/snippets/dataset_access_test.py b/samples/snippets/dataset_access_test.py index 4d1a70eb1..cc6a9af61 100644 --- a/samples/snippets/dataset_access_test.py +++ b/samples/snippets/dataset_access_test.py @@ -18,8 +18,8 @@ import update_dataset_access if typing.TYPE_CHECKING: - import pytest from google.cloud import bigquery + import pytest def test_dataset_access_permissions( diff --git a/samples/snippets/delete_job.py b/samples/snippets/delete_job.py index 7c8640baf..2aeb53849 100644 --- a/samples/snippets/delete_job.py +++ b/samples/snippets/delete_job.py @@ -17,8 +17,8 @@ def delete_job_metadata(job_id: str, location: str) -> None: orig_job_id = job_id orig_location = location # [START bigquery_delete_job] - from google.cloud import bigquery from google.api_core import exceptions + from google.cloud import bigquery # TODO(developer): Set the job ID to the ID of the job whose metadata you # wish to delete. diff --git a/samples/snippets/load_table_schema_from_json.py b/samples/snippets/load_table_schema_from_json.py new file mode 100644 index 000000000..3f1f85430 --- /dev/null +++ b/samples/snippets/load_table_schema_from_json.py @@ -0,0 +1,60 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pathlib + + +def load_table(table_id: str) -> None: + orig_uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + orig_table_id = table_id + current_directory = pathlib.Path(__file__).parent + orig_schema_path = str(current_directory / "schema_us_states.json") + # [START bigquery_schema_file_load] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change uri variable to the path of your data file. + uri = "gs://your-bucket/path/to/your-file.csv" + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table" + # TODO(dev): Change schema_path variable to the path of your schema file. + schema_path = "path/to/schema.json" + # [END bigquery_schema_file_load] + uri = orig_uri + table_id = orig_table_id + schema_path = orig_schema_path + # [START bigquery_schema_file_load] + # To load a schema file use the schema_from_json method. + schema = client.schema_from_json(schema_path) + + job_config = bigquery.LoadJobConfig( + # To use the schema you loaded pass it into the + # LoadJobConfig constructor. + schema=schema, + skip_leading_rows=1, + ) + + # Pass the job_config object to the load_table_from_file, + # load_table_from_json, or load_table_from_uri method + # to use the schema on a new table. + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) # Make an API request. + print(f"Loaded {destination_table.num_rows} rows to {table_id}.") + # [END bigquery_schema_file_load] diff --git a/samples/snippets/load_table_schema_from_json_test.py b/samples/snippets/load_table_schema_from_json_test.py new file mode 100644 index 000000000..267a6786c --- /dev/null +++ b/samples/snippets/load_table_schema_from_json_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import load_table_schema_from_json + +if typing.TYPE_CHECKING: + import pytest + + +def test_load_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + load_table_schema_from_json.load_table(random_table_id) + + out, _ = capsys.readouterr() + assert "Loaded" in out + assert random_table_id in out diff --git a/samples/snippets/materialized_view.py b/samples/snippets/materialized_view.py index adb3688a4..a47ee5b81 100644 --- a/samples/snippets/materialized_view.py +++ b/samples/snippets/materialized_view.py @@ -60,6 +60,7 @@ def update_materialized_view( # [START bigquery_update_materialized_view] import datetime + from google.cloud import bigquery bigquery_client = bigquery.Client() diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py index b0bad5ee5..610c63c3b 100644 --- a/samples/snippets/quickstart_test.py +++ b/samples/snippets/quickstart_test.py @@ -20,7 +20,6 @@ import quickstart - # Must match the dataset listed in quickstart.py (there's no easy way to # extract this). DATASET_ID = "my_new_dataset" diff --git a/samples/snippets/schema.json b/samples/snippets/schema.json new file mode 100644 index 000000000..bd2164dad --- /dev/null +++ b/samples/snippets/schema.json @@ -0,0 +1,20 @@ +[ + { + "name": "qtr", + "type": "STRING", + "mode": "REQUIRED", + "description": "quarter" + }, + { + "name": "rep", + "type": "STRING", + "mode": "NULLABLE", + "description": "sales representative" + }, + { + "name": "sales", + "type": "FLOAT", + "mode": "NULLABLE", + "defaultValueExpression": "2.55" + } +] diff --git a/samples/snippets/schema_us_states.json b/samples/snippets/schema_us_states.json new file mode 100644 index 000000000..7f2ccc277 --- /dev/null +++ b/samples/snippets/schema_us_states.json @@ -0,0 +1,12 @@ +[ + { + "name": "name", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "post_abbr", + "type": "STRING", + "mode": "NULLABLE" + } +] diff --git a/samples/snippets/user_credentials_test.py b/samples/snippets/user_credentials_test.py index e2794e83b..df8a6354d 100644 --- a/samples/snippets/user_credentials_test.py +++ b/samples/snippets/user_credentials_test.py @@ -21,7 +21,6 @@ from user_credentials import main - PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock] From b8502a6641b653610643aeb38992d330823feb94 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 14 Dec 2022 10:49:41 -0600 Subject: [PATCH 048/536] chore: remove code generation for BQML proto files (#1294) * chore: remove code generation for BQML proto files * remove protos from owlbot config Co-authored-by: Anthonios Partheniou Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> --- .github/.OwlBot.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/.OwlBot.yaml b/.github/.OwlBot.yaml index e54051157..8b142686c 100644 --- a/.github/.OwlBot.yaml +++ b/.github/.OwlBot.yaml @@ -18,9 +18,5 @@ docker: deep-remove-regex: - /owl-bot-staging -deep-copy-regex: - - source: /google/cloud/bigquery/(v.*)/.*-py/(.*) - dest: /owl-bot-staging/$1/$2 - begin-after-commit-hash: f2de93abafa306b2ebadf1d10d947db8bcf2bf15 From 093cc6852ada29898c4a4d047fd216544ef15bba Mon Sep 17 00:00:00 2001 From: Jared Chapman Date: Mon, 19 Dec 2022 13:56:07 -0600 Subject: [PATCH 049/536] docs: create sample to write schema file from table (#1439) * docs: create sample to write schema file from table * Apply suggestions from code review Co-authored-by: Tim Swast --- samples/snippets/get_table_make_schema.py | 47 +++++++++++++++++++ .../snippets/get_table_make_schema_test.py | 36 ++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 samples/snippets/get_table_make_schema.py create mode 100644 samples/snippets/get_table_make_schema_test.py diff --git a/samples/snippets/get_table_make_schema.py b/samples/snippets/get_table_make_schema.py new file mode 100644 index 000000000..f870b42de --- /dev/null +++ b/samples/snippets/get_table_make_schema.py @@ -0,0 +1,47 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_table_make_schema(table_id: str, schema_path: str) -> None: + orig_table_id = table_id + orig_schema_path = schema_path + # [START bigquery_schema_file_get] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change the table_id variable to the full name of the + # table you want to get schema from. + table_id = "your-project.your_dataset.your_table_name" + + # TODO(dev): Change schema_path variable to the path + # of your schema file. + schema_path = "path/to/schema.json" + # [END bigquery_schema_file_get] + table_id = orig_table_id + schema_path = orig_schema_path + # [START bigquery_schema_file_get] + table = client.get_table(table_id) # Make an API request. + + # Write a schema file to schema_path with the schema_to_json method. + client.schema_to_json(table.schema, schema_path) + + with open(schema_path, "r", encoding="utf-8") as schema_file: + schema_contents = schema_file.read() + + # View table properties + print(f"Got table '{table.project}.{table.dataset_id}.{table.table_id}'.") + print(f"Table schema: {schema_contents}") + + # [END bigquery_schema_file_get] diff --git a/samples/snippets/get_table_make_schema_test.py b/samples/snippets/get_table_make_schema_test.py new file mode 100644 index 000000000..424f16e39 --- /dev/null +++ b/samples/snippets/get_table_make_schema_test.py @@ -0,0 +1,36 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import get_table_make_schema + +if typing.TYPE_CHECKING: + import pathlib + + import pytest + + +def test_get_table_make_schema( + capsys: "pytest.CaptureFixture[str]", + table_id: str, + tmp_path: "pathlib.Path", +) -> None: + schema_path = str(tmp_path / "test_schema.json") + + get_table_make_schema.get_table_make_schema(table_id, schema_path) + + out, _ = capsys.readouterr() + assert "Got table" in out + assert table_id in out From a0976318fc5ad1620a68250c3e059e2a51d4946d Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Tue, 20 Dec 2022 14:28:30 -0600 Subject: [PATCH 050/536] docs: revise sample for nested schema (#1446) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: revise sample for nested schema * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * added TODO Co-authored-by: Owl Bot --- docs/snippets.py | 2 + samples/snippets/nested_repeated_schema.py | 54 +++++++++++++++++++ .../snippets/nested_repeated_schema_test.py | 32 +++++++++++ 3 files changed, 88 insertions(+) create mode 100644 samples/snippets/nested_repeated_schema.py create mode 100644 samples/snippets/nested_repeated_schema_test.py diff --git a/docs/snippets.py b/docs/snippets.py index 05e4fa378..b9860e4da 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -118,6 +118,8 @@ def test_create_client_default_credentials(): assert client is not None +# TODO(Mattix23): After code sample from https://github.com/googleapis/python-bigquery/pull/1446 +# is updated from cloud.google.com delete this. def test_create_table_nested_repeated_schema(client, to_delete): dataset_id = "create_table_nested_repeated_{}".format(_millis()) project = client.project diff --git a/samples/snippets/nested_repeated_schema.py b/samples/snippets/nested_repeated_schema.py new file mode 100644 index 000000000..5d55860cc --- /dev/null +++ b/samples/snippets/nested_repeated_schema.py @@ -0,0 +1,54 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def nested_schema(table_id: str) -> None: + orig_table_id = table_id + # [START bigquery_nested_repeated_schema] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table_name" + + schema = [ + bigquery.SchemaField("id", "STRING", mode="NULLABLE"), + bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("dob", "DATE", mode="NULLABLE"), + bigquery.SchemaField( + "addresses", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("status", "STRING", mode="NULLABLE"), + bigquery.SchemaField("address", "STRING", mode="NULLABLE"), + bigquery.SchemaField("city", "STRING", mode="NULLABLE"), + bigquery.SchemaField("state", "STRING", mode="NULLABLE"), + bigquery.SchemaField("zip", "STRING", mode="NULLABLE"), + bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"), + ], + ), + ] + # [END bigquery_nested_repeated_schema] + + table_id = orig_table_id + + # [START bigquery_nested_repeated_schema] + table = bigquery.Table(table_id, schema=schema) + table = client.create_table(table) # API request + + print(f"Created table {table.project}.{table.dataset_id}.{table.table_id}.") + # [END bigquery_nested_repeated_schema] diff --git a/samples/snippets/nested_repeated_schema_test.py b/samples/snippets/nested_repeated_schema_test.py new file mode 100644 index 000000000..0386fc8fb --- /dev/null +++ b/samples/snippets/nested_repeated_schema_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import nested_repeated_schema + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + nested_repeated_schema.nested_schema(random_table_id) + + out, _ = capsys.readouterr() + assert "Created" in out + assert random_table_id in out From ed1de4f2968cb8ebe3146ca39ee5c2e3e025903b Mon Sep 17 00:00:00 2001 From: Jared Chapman Date: Fri, 6 Jan 2023 12:09:26 -0600 Subject: [PATCH 051/536] chore: install all sample dependencies when type checking samples (#1455) --- google/__init__.py | 2 +- google/cloud/bigquery/opentelemetry_tracing.py | 6 +++--- noxfile.py | 14 ++++++++++++-- samples/mypy.ini | 1 - 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/google/__init__.py b/google/__init__.py index 8fcc60e2b..8e60d8439 100644 --- a/google/__init__.py +++ b/google/__init__.py @@ -21,4 +21,4 @@ except ImportError: import pkgutil - __path__ = pkgutil.extend_path(__path__, __name__) + __path__ = pkgutil.extend_path(__path__, __name__) # type: ignore diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index 2345fd1bb..3d0a66ba8 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -18,9 +18,9 @@ logger = logging.getLogger(__name__) try: - from opentelemetry import trace - from opentelemetry.instrumentation.utils import http_status_to_status_code - from opentelemetry.trace.status import Status + from opentelemetry import trace # type: ignore + from opentelemetry.instrumentation.utils import http_status_to_status_code # type: ignore + from opentelemetry.trace.status import Status # type: ignore HAS_OPENTELEMETRY = True _warned_telemetry = True diff --git a/noxfile.py b/noxfile.py index a91e60a5f..139093acc 100644 --- a/noxfile.py +++ b/noxfile.py @@ -200,12 +200,22 @@ def mypy_samples(session): """Run type checks with mypy.""" session.install("-e", ".[all]") - session.install("ipython", "pytest") + session.install("pytest") + for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): + session.install("-r", requirements_path) session.install(MYPY_VERSION) # Just install the dependencies' type info directly, since "mypy --install-types" # might require an additional pass. - session.install("types-mock", "types-pytz") + session.install( + "types-mock", + "types-pytz", + "types-protobuf", + "types-python-dateutil", + "types-requests", + "types-setuptools", + ) + session.install("typing-extensions") # for TypedDict in pre-3.8 Python versions session.run( diff --git a/samples/mypy.ini b/samples/mypy.ini index 29757e47d..3f7eb6647 100644 --- a/samples/mypy.ini +++ b/samples/mypy.ini @@ -2,7 +2,6 @@ # Should match DEFAULT_PYTHON_VERSION from root noxfile.py python_version = 3.8 exclude = noxfile\.py -strict = True warn_unused_configs = True [mypy-google.auth,google.oauth2,geojson,google_auth_oauthlib,IPython.*] From 14ae1f20538ea00829a1325f91f5e8524234bd0c Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Fri, 6 Jan 2023 12:42:38 -0600 Subject: [PATCH 052/536] docs: revise label table code samples (#1451) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: revise label table code samples * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * added TODO to clean up snippets.py Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- docs/snippets.py | 3 +++ samples/snippets/label_table.py | 37 ++++++++++++++++++++++++++++ samples/snippets/label_table_test.py | 32 ++++++++++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 samples/snippets/label_table.py create mode 100644 samples/snippets/label_table_test.py diff --git a/docs/snippets.py b/docs/snippets.py index b9860e4da..a0c01870a 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -250,6 +250,9 @@ def test_manage_table_labels(client, to_delete): table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) table = client.create_table(table) + # TODO(Mattix23): After code sample from https://github.com/googleapis/python-bigquery/pull/1451 + # is updated from cloud.google.com delete this. + # [START bigquery_label_table] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/samples/snippets/label_table.py b/samples/snippets/label_table.py new file mode 100644 index 000000000..5fce08d62 --- /dev/null +++ b/samples/snippets/label_table.py @@ -0,0 +1,37 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def label_table(table_id: str) -> None: + orig_table_id = table_id + # [START bigquery_label_table] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table_name" + + # [END bigquery_label_table] + table_id = orig_table_id + # [START bigquery_label_table] + table = client.get_table(table_id) # API request + + labels = {"color": "green"} + table.labels = labels + + table = client.update_table(table, ["labels"]) # API request + + print(f"Added {table.labels} to {table_id}.") + # [END bigquery_label_table] diff --git a/samples/snippets/label_table_test.py b/samples/snippets/label_table_test.py new file mode 100644 index 000000000..a77fb4b75 --- /dev/null +++ b/samples/snippets/label_table_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import label_table + +if typing.TYPE_CHECKING: + import pytest + + +def test_label_table( + capsys: "pytest.CaptureFixture[str]", + table_id: str, +) -> None: + + label_table.label_table(table_id) + + out, _ = capsys.readouterr() + assert "color" in out + assert table_id in out From 57740e49af7418449aec73a6fdd307fcb588c655 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Fri, 6 Jan 2023 14:25:01 -0600 Subject: [PATCH 053/536] docs: revise create table cmek sample (#1452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: revise create table cmek sample * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- docs/snippets.py | 2 +- samples/snippets/create_table_cmek.py | 46 ++++++++++++++++++++++ samples/snippets/create_table_cmek_test.py | 37 +++++++++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 samples/snippets/create_table_cmek.py create mode 100644 samples/snippets/create_table_cmek_test.py diff --git a/docs/snippets.py b/docs/snippets.py index a0c01870a..3c4967a59 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -168,7 +168,7 @@ def test_create_table_cmek(client, to_delete): dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) - + # TODO(Mattix23): When sample is updated in cloud.google.com, delete this one. # [START bigquery_create_table_cmek] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/samples/snippets/create_table_cmek.py b/samples/snippets/create_table_cmek.py new file mode 100644 index 000000000..011c56d4e --- /dev/null +++ b/samples/snippets/create_table_cmek.py @@ -0,0 +1,46 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_cmek(table_id: str, kms_key_name: str) -> None: + orig_table_id = table_id + orig_key_name = kms_key_name + # [START bigquery_create_table_cmek] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table_name" + + # Set the encryption key to use for the table. + # TODO: Replace this key with a key you have created in Cloud KMS. + kms_key_name = "projects/your-project/locations/us/keyRings/test/cryptoKeys/test" + + # [END bigquery_create_table_cmek] + + table_id = orig_table_id + kms_key_name = orig_key_name + + # [START bigquery_create_table_cmek] + table = bigquery.Table(table_id) + table.encryption_configuration = bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name + ) + table = client.create_table(table) # API request + + print(f"Created {table_id}.") + print(f"Key: {table.encryption_configuration.kms_key_name}.") + + # [END bigquery_create_table_cmek] diff --git a/samples/snippets/create_table_cmek_test.py b/samples/snippets/create_table_cmek_test.py new file mode 100644 index 000000000..429baf3fd --- /dev/null +++ b/samples/snippets/create_table_cmek_test.py @@ -0,0 +1,37 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import create_table_cmek + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + kms_key_name = ( + "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/test" + ) + + create_table_cmek.create_table_cmek(random_table_id, kms_key_name) + + out, _ = capsys.readouterr() + assert "Created" in out + assert random_table_id in out + assert kms_key_name in out From 72b25c52bc4b9a92c4cb187b6230b280d4af905c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 7 Jan 2023 06:59:36 -0800 Subject: [PATCH 054/536] chore(python): add support for python 3.11 [autoapprove] (#1454) * chore(python): add support for python 3.11 Source-Link: https://github.com/googleapis/synthtool/commit/7197a001ffb6d8ce7b0b9b11c280f0c536c1033a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:c43f1d918bcf817d337aa29ff833439494a158a0831508fda4ec75dc4c0d0320 * update unit test python versions * modify test to cater for python 3.11 * require grpcio >= 1.49.1 for python 3.11 Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 +- .kokoro/samples/python3.11/common.cfg | 40 ++++++++++++++++++++ .kokoro/samples/python3.11/continuous.cfg | 6 +++ .kokoro/samples/python3.11/periodic-head.cfg | 11 ++++++ .kokoro/samples/python3.11/periodic.cfg | 6 +++ .kokoro/samples/python3.11/presubmit.cfg | 6 +++ CONTRIBUTING.rst | 6 ++- noxfile.py | 2 +- samples/geography/noxfile.py | 2 +- samples/magics/noxfile.py | 2 +- samples/snippets/noxfile.py | 2 +- setup.py | 2 + tests/unit/line_arg_parser/test_parser.py | 2 +- 13 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 .kokoro/samples/python3.11/common.cfg create mode 100644 .kokoro/samples/python3.11/continuous.cfg create mode 100644 .kokoro/samples/python3.11/periodic-head.cfg create mode 100644 .kokoro/samples/python3.11/periodic.cfg create mode 100644 .kokoro/samples/python3.11/presubmit.cfg diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index fccaa8e84..889f77dfa 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,4 +1,4 @@ -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3bf87e47c2173d7eed42714589dc4da2c07c3268610f1e47f8e1a30decbfc7f1 + digest: sha256:c43f1d918bcf817d337aa29ff833439494a158a0831508fda4ec75dc4c0d0320 diff --git a/.kokoro/samples/python3.11/common.cfg b/.kokoro/samples/python3.11/common.cfg new file mode 100644 index 000000000..f5adc8703 --- /dev/null +++ b/.kokoro/samples/python3.11/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.11" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-311" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.11/continuous.cfg b/.kokoro/samples/python3.11/continuous.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.11/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.11/periodic-head.cfg b/.kokoro/samples/python3.11/periodic-head.cfg new file mode 100644 index 000000000..5aa01bab5 --- /dev/null +++ b/.kokoro/samples/python3.11/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.11/periodic.cfg b/.kokoro/samples/python3.11/periodic.cfg new file mode 100644 index 000000000..71cd1e597 --- /dev/null +++ b/.kokoro/samples/python3.11/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/.kokoro/samples/python3.11/presubmit.cfg b/.kokoro/samples/python3.11/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.11/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index d06598b31..5dc30a1f8 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows. + 3.7, 3.8, 3.9, 3.10 and 3.11 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.10 -- -k + $ nox -s unit-3.11 -- -k .. note:: @@ -225,11 +225,13 @@ We support: - `Python 3.8`_ - `Python 3.9`_ - `Python 3.10`_ +- `Python 3.11`_ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ +.. _Python 3.11: https://docs.python.org/3.11/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/noxfile.py b/noxfile.py index 139093acc..5f08c016b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -37,7 +37,7 @@ DEFAULT_PYTHON_VERSION = "3.8" SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.10"] -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() # 'docfx' is excluded since it only needs to run in 'docs-presubmit' diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index e8283c38d..1224cbe21 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index e8283c38d..1224cbe21 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index e8283c38d..1224cbe21 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/setup.py b/setup.py index 9e1bfbbce..fd1687193 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ "grpcio >= 1.47.0, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/1262 + "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", # NOTE: Maintainers, please do not require google-api-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 @@ -58,6 +59,7 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", pyarrow_dependency, ], "pandas": [ diff --git a/tests/unit/line_arg_parser/test_parser.py b/tests/unit/line_arg_parser/test_parser.py index 3f9e9ff41..b170d536a 100644 --- a/tests/unit/line_arg_parser/test_parser.py +++ b/tests/unit/line_arg_parser/test_parser.py @@ -44,7 +44,7 @@ def test_consume_unexpected_eol(parser_class): fake_lexer = [Token(TokenType.EOL, lexeme="", pos=0)] parser = parser_class(fake_lexer) - with pytest.raises(ParseError, match=r"Unexpected end of input.*expected COLON.*"): + with pytest.raises(ParseError, match=r"Unexpected end of input.*expected.*COLON.*"): parser.consume(TokenType.COLON) From 14a2186d4f99e529eb16490b9fc41a46d17e7f18 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jan 2023 17:59:30 +0000 Subject: [PATCH 055/536] chore(deps): update dependency certifi to v2022.12.7 [security] (#1434) Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 798de6bb5..1e7905b31 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==22.1.0 -certifi==2022.9.24 +certifi==2022.12.7 cffi==1.15.1 charset-normalizer==2.1.1 click==8.1.3 From 1287625e3372128f075a3f3a093062eef4083285 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jan 2023 19:06:20 +0000 Subject: [PATCH 056/536] chore(deps): update all dependencies (#1390) * chore(deps): update all dependencies * update dependency db-dtypes * update dependency google-cloud-bigquery * revert Co-authored-by: Chalmer Lowe Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> Co-authored-by: Anthonios Partheniou --- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 10 +++++----- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 10 +++++----- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 12 ++++++------ 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 6f722c66e..980c425b9 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.1.3 +pytest==7.2.0 mock==4.0.3 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1e7905b31..fc6976a0e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -6,14 +6,14 @@ click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.0.4 +db-dtypes==1.0.5 Fiona==1.8.22 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.1; python_version >= '3.8' google-api-core==2.10.2 -google-auth==2.13.0 -google-cloud-bigquery==3.3.5 +google-auth==2.14.0 +google-cloud-bigquery==3.3.6 google-cloud-bigquery-storage==2.16.2 google-cloud-core==2.3.2 google-crc32c==1.5.0 @@ -28,13 +28,13 @@ packaging==21.3 pandas===1.3.5; python_version == '3.7' pandas==1.5.1; python_version >= '3.8' proto-plus==1.22.1 -pyarrow==9.0.0 +pyarrow==10.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.5 +pytz==2022.6 PyYAML==6.0 requests==2.28.1 rsa==4.9 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 7902c72ef..e5173c344 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.1.3 +pytest==7.2.0 mock==4.0.3 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index bdd026ce5..7b5291365 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,15 +1,15 @@ -db-dtypes==1.0.4 +db-dtypes==1.0.5 google-cloud-bigquery-storage==2.16.2 -google-auth-oauthlib==0.5.3 +google-auth-oauthlib==0.7.0 grpcio==1.50.0 ipywidgets==8.0.2 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.5.0; python_version >= '3.9' +ipython==8.6.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.1; python_version >= '3.8' -pyarrow==9.0.0 -pytz==2022.5 +pyarrow==10.0.0 +pytz==2022.6 typing-extensions==4.4.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 7902c72ef..e5173c344 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.1.3 +pytest==7.2.0 mock==4.0.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index ebf892279..82e7452df 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ -db-dtypes==1.0.4 -google-cloud-bigquery==3.3.5 +db-dtypes==1.0.5 +google-cloud-bigquery==3.3.6 google-cloud-bigquery-storage==2.16.2 -google-auth-oauthlib==0.5.3 +google-auth-oauthlib==0.7.0 grpcio==1.50.0 ipywidgets==8.0.2 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.5.0; python_version >= '3.9' +ipython==8.6.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.1; python_version >= '3.8' -pyarrow==9.0.0 -pytz==2022.5 +pyarrow==10.0.0 +pytz==2022.6 typing-extensions==4.4.0 From e885f3c8a76e2672a3a7876c3916fc5ce8d4591a Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Tue, 10 Jan 2023 12:03:42 -0600 Subject: [PATCH 057/536] chore: delete duplicate code from snippets.py (#1458) * chore: delete duplicate code samples from snippets.py * fixed reference to code sample --- docs/snippets.py | 93 +-------------------------------------- docs/usage/encryption.rst | 2 +- 2 files changed, 2 insertions(+), 93 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 3c4967a59..efc551310 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -118,80 +118,6 @@ def test_create_client_default_credentials(): assert client is not None -# TODO(Mattix23): After code sample from https://github.com/googleapis/python-bigquery/pull/1446 -# is updated from cloud.google.com delete this. -def test_create_table_nested_repeated_schema(client, to_delete): - dataset_id = "create_table_nested_repeated_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_nested_repeated_schema] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') - - schema = [ - bigquery.SchemaField("id", "STRING", mode="NULLABLE"), - bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("dob", "DATE", mode="NULLABLE"), - bigquery.SchemaField( - "addresses", - "RECORD", - mode="REPEATED", - fields=[ - bigquery.SchemaField("status", "STRING", mode="NULLABLE"), - bigquery.SchemaField("address", "STRING", mode="NULLABLE"), - bigquery.SchemaField("city", "STRING", mode="NULLABLE"), - bigquery.SchemaField("state", "STRING", mode="NULLABLE"), - bigquery.SchemaField("zip", "STRING", mode="NULLABLE"), - bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"), - ], - ), - ] - table_ref = dataset_ref.table("my_table") - table = bigquery.Table(table_ref, schema=schema) - table = client.create_table(table) # API request - - print("Created table {}".format(table.full_table_id)) - # [END bigquery_nested_repeated_schema] - - -def test_create_table_cmek(client, to_delete): - dataset_id = "create_table_cmek_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - # TODO(Mattix23): When sample is updated in cloud.google.com, delete this one. - # [START bigquery_create_table_cmek] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - table_ref = dataset.table("my_table") - table = bigquery.Table(table_ref) - - # Set the encryption key to use for the table. - # TODO: Replace this key with a key you have created in Cloud KMS. - kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( - "cloud-samples-tests", "us", "test", "test" - ) - table.encryption_configuration = bigquery.EncryptionConfiguration( - kms_key_name=kms_key_name - ) - - table = client.create_table(table) # API request - - assert table.encryption_configuration.kms_key_name == kms_key_name - # [END bigquery_create_table_cmek] - - def test_create_partitioned_table(client, to_delete): dataset_id = "create_table_partitioned_{}".format(_millis()) project = client.project @@ -248,27 +174,10 @@ def test_manage_table_labels(client, to_delete): to_delete.append(dataset) table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # TODO(Mattix23): After code sample from https://github.com/googleapis/python-bigquery/pull/1451 - # is updated from cloud.google.com delete this. - # [START bigquery_label_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, dataset_id) - # table_ref = dataset_ref.table('my_table') - # table = client.get_table(table_ref) # API request - - assert table.labels == {} labels = {"color": "green"} table.labels = labels - - table = client.update_table(table, ["labels"]) # API request - - assert table.labels == labels - # [END bigquery_label_table] + table = client.create_table(table) # [START bigquery_get_table_labels] # from google.cloud import bigquery diff --git a/docs/usage/encryption.rst b/docs/usage/encryption.rst index 6652f0565..3e6d5aacc 100644 --- a/docs/usage/encryption.rst +++ b/docs/usage/encryption.rst @@ -10,7 +10,7 @@ in the BigQuery documentation for more details. Create a new table, using a customer-managed encryption key from Cloud KMS to encrypt it. -.. literalinclude:: ../snippets.py +.. literalinclude:: ../samples/snippets/create_table_cmek.py :language: python :dedent: 4 :start-after: [START bigquery_create_table_cmek] From 2e80215db476e63b2529cb04a5891f5146a0c67c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 11 Jan 2023 18:37:16 +0000 Subject: [PATCH 058/536] chore(deps): update all dependencies (#1456) * chore(deps): update all dependencies * revert packaging * revert charset-normalizer Co-authored-by: Anthonios Partheniou --- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 30 ++++++++++++------------- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 18 +++++++-------- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 20 ++++++++--------- 6 files changed, 37 insertions(+), 37 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 980c425b9..1e6b7c5ea 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ pytest==7.2.0 -mock==4.0.3 +mock==5.0.1 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index fc6976a0e..e3ba3102e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ -attrs==22.1.0 +attrs==22.2.0 certifi==2022.12.7 cffi==1.15.1 charset-normalizer==2.1.1 @@ -10,36 +10,36 @@ db-dtypes==1.0.5 Fiona==1.8.22 geojson==2.5.0 geopandas===0.10.2; python_version == '3.7' -geopandas==0.12.1; python_version >= '3.8' -google-api-core==2.10.2 -google-auth==2.14.0 -google-cloud-bigquery==3.3.6 -google-cloud-bigquery-storage==2.16.2 +geopandas==0.12.2; python_version >= '3.8' +google-api-core==2.11.0 +google-auth==2.16.0 +google-cloud-bigquery==3.4.1 +google-cloud-bigquery-storage==2.17.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.0 -googleapis-common-protos==1.56.4 -grpcio==1.50.0 +googleapis-common-protos==1.58.0 +grpcio==1.51.1 idna==3.4 -libcst==0.4.7 +libcst==0.4.9 munch==2.5.0 mypy-extensions==0.4.3 packaging==21.3 pandas===1.3.5; python_version == '3.7' -pandas==1.5.1; python_version >= '3.8' -proto-plus==1.22.1 -pyarrow==10.0.0 +pandas==1.5.2; python_version >= '3.8' +proto-plus==1.22.2 +pyarrow==10.0.1 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.6 +pytz==2022.7 PyYAML==6.0 requests==2.28.1 rsa==4.9 -Shapely==1.8.5.post1 +Shapely==2.0.0 six==1.16.0 typing-extensions==4.4.0 typing-inspect==0.8.0 -urllib3==1.26.12 +urllib3==1.26.13 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index e5173c344..56aa0f432 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 pytest==7.2.0 -mock==4.0.3 +mock==5.0.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 7b5291365..133370fbe 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.0.5 -google-cloud-bigquery-storage==2.16.2 -google-auth-oauthlib==0.7.0 -grpcio==1.50.0 -ipywidgets==8.0.2 +google-cloud-bigquery-storage==2.17.0 +google-auth-oauthlib==0.8.0 +grpcio==1.51.1 +ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.6.0; python_version >= '3.9' +ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.1; python_version >= '3.8' +matplotlib==3.6.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.1; python_version >= '3.8' -pyarrow==10.0.0 -pytz==2022.6 +pandas==1.5.2; python_version >= '3.8' +pyarrow==10.0.1 +pytz==2022.7 typing-extensions==4.4.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index e5173c344..56aa0f432 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 pytest==7.2.0 -mock==4.0.3 +mock==5.0.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 82e7452df..27bbb3631 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.0.5 -google-cloud-bigquery==3.3.6 -google-cloud-bigquery-storage==2.16.2 -google-auth-oauthlib==0.7.0 -grpcio==1.50.0 -ipywidgets==8.0.2 +google-cloud-bigquery==3.4.1 +google-cloud-bigquery-storage==2.17.0 +google-auth-oauthlib==0.8.0 +grpcio==1.51.1 +ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.6.0; python_version >= '3.9' +ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.1; python_version >= '3.8' +matplotlib==3.6.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.1; python_version >= '3.8' -pyarrow==10.0.0 -pytz==2022.6 +pandas==1.5.2; python_version >= '3.8' +pyarrow==10.0.1 +pytz==2022.7 typing-extensions==4.4.0 From 4e192bd9793bdb501eff063b683784b6d2774a6b Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 12 Jan 2023 00:18:41 +0000 Subject: [PATCH 059/536] chore(deps): update all dependencies (#1460) * chore(deps): update all dependencies * revert Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e3ba3102e..316758ee9 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.16.0 google-cloud-bigquery==3.4.1 -google-cloud-bigquery-storage==2.17.0 +google-cloud-bigquery-storage==2.18.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.0 @@ -42,4 +42,4 @@ Shapely==2.0.0 six==1.16.0 typing-extensions==4.4.0 typing-inspect==0.8.0 -urllib3==1.26.13 +urllib3==1.26.14 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 133370fbe..1ae7c4974 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.5 -google-cloud-bigquery-storage==2.17.0 +google-cloud-bigquery-storage==2.18.0 google-auth-oauthlib==0.8.0 grpcio==1.51.1 ipywidgets==8.0.4 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 27bbb3631..23e31a200 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.5 google-cloud-bigquery==3.4.1 -google-cloud-bigquery-storage==2.17.0 +google-cloud-bigquery-storage==2.18.0 google-auth-oauthlib==0.8.0 grpcio==1.51.1 ipywidgets==8.0.4 From 52451a192bebf59d683f0e295cf38b8c06757315 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 12 Jan 2023 17:23:39 +0000 Subject: [PATCH 060/536] chore(deps): update all dependencies (#1461) * chore(deps): update all dependencies * revert packaging Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 316758ee9..0f475e0fd 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==22.2.0 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==2.1.1 +charset-normalizer==3.0.1 click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 @@ -36,7 +36,7 @@ pyparsing==3.0.9 python-dateutil==2.8.2 pytz==2022.7 PyYAML==6.0 -requests==2.28.1 +requests==2.28.2 rsa==4.9 Shapely==2.0.0 six==1.16.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 1ae7c4974..463829c8f 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -7,7 +7,7 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.2; python_version >= '3.8' +matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.2; python_version >= '3.8' pyarrow==10.0.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 23e31a200..1ecf3fc45 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,7 +8,7 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.2; python_version >= '3.8' +matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.2; python_version >= '3.8' pyarrow==10.0.1 From 730a1dec8be49df26a3d805ebd4ad185ba72170d Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 12 Jan 2023 16:06:04 -0500 Subject: [PATCH 061/536] fix: Add support for python 3.11 (#1463) * fix: Add support for python 3.11 * use python 3.11 for system tests --- ...{prerelease-deps-3.10.cfg => prerelease-deps-3.11.cfg} | 2 +- ...{prerelease-deps-3.10.cfg => prerelease-deps-3.11.cfg} | 2 +- .../presubmit/{snippets-3.10.cfg => snippets-3.11.cfg} | 2 +- .kokoro/presubmit/{system-3.10.cfg => system-3.11.cfg} | 2 +- noxfile.py | 8 ++++---- setup.py | 1 + 6 files changed, 9 insertions(+), 8 deletions(-) rename .kokoro/continuous/{prerelease-deps-3.10.cfg => prerelease-deps-3.11.cfg} (77%) rename .kokoro/presubmit/{prerelease-deps-3.10.cfg => prerelease-deps-3.11.cfg} (77%) rename .kokoro/presubmit/{snippets-3.10.cfg => snippets-3.11.cfg} (81%) rename .kokoro/presubmit/{system-3.10.cfg => system-3.11.cfg} (82%) diff --git a/.kokoro/continuous/prerelease-deps-3.10.cfg b/.kokoro/continuous/prerelease-deps-3.11.cfg similarity index 77% rename from .kokoro/continuous/prerelease-deps-3.10.cfg rename to .kokoro/continuous/prerelease-deps-3.11.cfg index 339980bdd..1e19f1239 100644 --- a/.kokoro/continuous/prerelease-deps-3.10.cfg +++ b/.kokoro/continuous/prerelease-deps-3.11.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.10" + value: "prerelease_deps-3.11" } diff --git a/.kokoro/presubmit/prerelease-deps-3.10.cfg b/.kokoro/presubmit/prerelease-deps-3.11.cfg similarity index 77% rename from .kokoro/presubmit/prerelease-deps-3.10.cfg rename to .kokoro/presubmit/prerelease-deps-3.11.cfg index 339980bdd..1e19f1239 100644 --- a/.kokoro/presubmit/prerelease-deps-3.10.cfg +++ b/.kokoro/presubmit/prerelease-deps-3.11.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.10" + value: "prerelease_deps-3.11" } diff --git a/.kokoro/presubmit/snippets-3.10.cfg b/.kokoro/presubmit/snippets-3.11.cfg similarity index 81% rename from .kokoro/presubmit/snippets-3.10.cfg rename to .kokoro/presubmit/snippets-3.11.cfg index dde182fb9..74af6dd07 100644 --- a/.kokoro/presubmit/snippets-3.10.cfg +++ b/.kokoro/presubmit/snippets-3.11.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-3.10" + value: "snippets-3.11" } diff --git a/.kokoro/presubmit/system-3.10.cfg b/.kokoro/presubmit/system-3.11.cfg similarity index 82% rename from .kokoro/presubmit/system-3.10.cfg rename to .kokoro/presubmit/system-3.11.cfg index 30956a3ab..5ff31a603 100644 --- a/.kokoro/presubmit/system-3.10.cfg +++ b/.kokoro/presubmit/system-3.11.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "system-3.10" + value: "system-3.11" } diff --git a/noxfile.py b/noxfile.py index 5f08c016b..381876462 100644 --- a/noxfile.py +++ b/noxfile.py @@ -36,7 +36,7 @@ ) DEFAULT_PYTHON_VERSION = "3.8" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.10"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11"] UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() @@ -80,7 +80,7 @@ def default(session, install_extras=True): constraints_path, ) - if install_extras and session.python == "3.10": + if install_extras and session.python == "3.11": install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" elif install_extras: install_target = ".[all]" @@ -185,7 +185,7 @@ def system(session): # Data Catalog needed for the column ACL test with a real Policy Tag. session.install("google-cloud-datacatalog", "-c", constraints_path) - if session.python == "3.10": + if session.python == "3.11": extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" @@ -244,7 +244,7 @@ def snippets(session): session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) - if session.python == "3.10": + if session.python == "3.11": extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" diff --git a/setup.py b/setup.py index fd1687193..77a9bb53c 100644 --- a/setup.py +++ b/setup.py @@ -131,6 +131,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Operating System :: OS Independent", "Topic :: Internet", ], From 60881296a35067e7aa025d92b2425572f10fd4ec Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 13 Jan 2023 11:41:25 -0600 Subject: [PATCH 062/536] deps: remove upper bound on packaging dependency (#1440) * deps: remove upper bound on packaging dependency Towards #1435 * install prerelease version of packaging * bump minimum packaging version Co-authored-by: Anthonios Partheniou --- noxfile.py | 5 +++++ setup.py | 2 +- testing/constraints-3.7.txt | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 381876462..f6283abf9 100644 --- a/noxfile.py +++ b/noxfile.py @@ -300,6 +300,11 @@ def prerelease_deps(session): "--upgrade", "pandas", ) + session.install( + "--pre", + "--upgrade", + "git+https://github.com/pypa/packaging.git", + ) session.install( "--pre", diff --git a/setup.py b/setup.py index 77a9bb53c..81cb2e5db 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ # https://github.com/googleapis/google-cloud-python/issues/10566 "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", - "packaging >= 14.3, <22.0.0dev", + "packaging >= 20.0.0", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "requests >= 2.21.0, < 3.0.0dev", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 149d6c496..3701c7343 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -17,6 +17,7 @@ ipython==7.0.1 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 +packaging==20.0.0 pandas==1.1.0 proto-plus==1.22.0 protobuf==3.19.5 From b5ccbfe4eee91d7f481d9708084cd29d0c85e666 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Fri, 13 Jan 2023 14:07:38 -0600 Subject: [PATCH 063/536] =?UTF-8?q?docs:=20revise=20get=20table=20labels?= =?UTF-8?q?=20code=20sample,=20add=20TODO=20to=20clean=20up=20snipp?= =?UTF-8?q?=E2=80=A6=20(#1464)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: revise get table labels code sample, add TODO to clean up snippets.py * added a test with labels * Update samples/snippets/get_table_labels_test.py Co-authored-by: Tim Swast --- docs/snippets.py | 2 + samples/snippets/get_table_labels.py | 39 ++++++++++++++++++ samples/snippets/get_table_labels_test.py | 50 +++++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 samples/snippets/get_table_labels.py create mode 100644 samples/snippets/get_table_labels_test.py diff --git a/docs/snippets.py b/docs/snippets.py index efc551310..85856eb3e 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -179,6 +179,8 @@ def test_manage_table_labels(client, to_delete): table.labels = labels table = client.create_table(table) + # TODO(Mattix23): After code sample is updated from cloud.google.com delete this + # [START bigquery_get_table_labels] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/samples/snippets/get_table_labels.py b/samples/snippets/get_table_labels.py new file mode 100644 index 000000000..8cfbd4ee2 --- /dev/null +++ b/samples/snippets/get_table_labels.py @@ -0,0 +1,39 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_table_labels(table_id: str) -> None: + orig_table_id = table_id + # [START bigquery_get_table_labels] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to create. + table_id = "your-project.your_dataset.your_table_name" + + # [END bigquery_get_table_labels] + table_id = orig_table_id + + # [START bigquery_get_table_labels] + table = client.get_table(table_id) # API Request + + # View table labels + print(f"Table ID: {table_id}.") + if table.labels: + for label, value in table.labels.items(): + print(f"\t{label}: {value}") + else: + print("\tTable has no labels defined.") + # [END bigquery_get_table_labels] diff --git a/samples/snippets/get_table_labels_test.py b/samples/snippets/get_table_labels_test.py new file mode 100644 index 000000000..f922e728c --- /dev/null +++ b/samples/snippets/get_table_labels_test.py @@ -0,0 +1,50 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from google.cloud import bigquery + +import get_table_labels + +if typing.TYPE_CHECKING: + import pytest + + +def test_get_table_labels( + capsys: "pytest.CaptureFixture[str]", + table_id: str, + bigquery_client: bigquery.Client, +) -> None: + table = bigquery_client.get_table(table_id) + table.labels = {"color": "green"} + bigquery_client.update_table(table, ["labels"]) + + get_table_labels.get_table_labels(table_id) + + out, _ = capsys.readouterr() + assert table_id in out + assert "color" in out + + +def test_get_table_labels_no_label( + capsys: "pytest.CaptureFixture[str]", + table_id: str, +) -> None: + + get_table_labels.get_table_labels(table_id) + + out, _ = capsys.readouterr() + assert "no labels defined" in out + assert table_id in out From fe38ffaf28d642682163dff5318311a2d088fe7c Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 18 Jan 2023 08:37:29 -0600 Subject: [PATCH 064/536] chore(main): release 3.4.2 (#1441) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 23 +++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6f0abc85..0a5b4d179 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,29 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.4.2](https://github.com/googleapis/python-bigquery/compare/v3.4.1...v3.4.2) (2023-01-13) + + +### Bug Fixes + +* Add support for python 3.11 ([#1463](https://github.com/googleapis/python-bigquery/issues/1463)) ([730a1de](https://github.com/googleapis/python-bigquery/commit/730a1dec8be49df26a3d805ebd4ad185ba72170d)) +* Require grpcio >= 1.49.1 for python 3.11 ([72b25c5](https://github.com/googleapis/python-bigquery/commit/72b25c52bc4b9a92c4cb187b6230b280d4af905c)) + + +### Dependencies + +* Remove upper bound on packaging dependency ([#1440](https://github.com/googleapis/python-bigquery/issues/1440)) ([6088129](https://github.com/googleapis/python-bigquery/commit/60881296a35067e7aa025d92b2425572f10fd4ec)) + + +### Documentation + +* Create sample to write schema file from table ([#1439](https://github.com/googleapis/python-bigquery/issues/1439)) ([093cc68](https://github.com/googleapis/python-bigquery/commit/093cc6852ada29898c4a4d047fd216544ef15bba)) +* Created samples for load table and create table from schema file ([#1436](https://github.com/googleapis/python-bigquery/issues/1436)) ([8ad2e5b](https://github.com/googleapis/python-bigquery/commit/8ad2e5bc1c04bf16fffe4c8773e722b68117c916)) +* Revise create table cmek sample ([#1452](https://github.com/googleapis/python-bigquery/issues/1452)) ([57740e4](https://github.com/googleapis/python-bigquery/commit/57740e49af7418449aec73a6fdd307fcb588c655)) +* Revise get table labels code sample, add TODO to clean up snipp… ([#1464](https://github.com/googleapis/python-bigquery/issues/1464)) ([b5ccbfe](https://github.com/googleapis/python-bigquery/commit/b5ccbfe4eee91d7f481d9708084cd29d0c85e666)) +* Revise label table code samples ([#1451](https://github.com/googleapis/python-bigquery/issues/1451)) ([14ae1f2](https://github.com/googleapis/python-bigquery/commit/14ae1f20538ea00829a1325f91f5e8524234bd0c)) +* Revise sample for nested schema ([#1446](https://github.com/googleapis/python-bigquery/issues/1446)) ([a097631](https://github.com/googleapis/python-bigquery/commit/a0976318fc5ad1620a68250c3e059e2a51d4946d)) + ## [3.4.1](https://github.com/googleapis/python-bigquery/compare/v3.4.0...v3.4.1) (2022-12-09) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 71133df01..d38bb4619 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.4.1" +__version__ = "3.4.2" From 6ef9fb9d5cc49b244be7476c439bccaa8e443bb9 Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 18 Jan 2023 09:57:21 -0800 Subject: [PATCH 065/536] testing: remove test_create_table_with_policy (#1470) This is superseded by a version that tests using real tags in test_create_table_with_real_custom_policy Fixes: https://github.com/googleapis/python-bigquery/issues/1468 --- tests/system/test_client.py | 51 ------------------------------------- 1 file changed, 51 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 575898209..14a9b04d4 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -335,57 +335,6 @@ def test_create_table(self): self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_id) - def test_create_table_with_policy(self): - from google.cloud.bigquery.schema import PolicyTagList - - dataset = self.temp_dataset(_make_dataset_id("create_table_with_policy")) - table_id = "test_table" - policy_1 = PolicyTagList( - names=[ - "projects/{}/locations/us/taxonomies/1/policyTags/2".format( - Config.CLIENT.project - ), - ] - ) - policy_2 = PolicyTagList( - names=[ - "projects/{}/locations/us/taxonomies/3/policyTags/4".format( - Config.CLIENT.project - ), - ] - ) - - schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField( - "secret_int", "INTEGER", mode="REQUIRED", policy_tags=policy_1 - ), - ] - table_arg = Table(dataset.table(table_id), schema=schema) - self.assertFalse(_table_exists(table_arg)) - - table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) - self.to_delete.insert(0, table) - - self.assertTrue(_table_exists(table)) - self.assertEqual(policy_1, table.schema[1].policy_tags) - - # Amend the schema to replace the policy tags - new_schema = table.schema[:] - old_field = table.schema[1] - new_schema[1] = bigquery.SchemaField( - name=old_field.name, - field_type=old_field.field_type, - mode=old_field.mode, - description=old_field.description, - fields=old_field.fields, - policy_tags=policy_2, - ) - - table.schema = new_schema - table2 = Config.CLIENT.update_table(table, ["schema"]) - self.assertEqual(policy_2, table2.schema[1].policy_tags) - def test_create_table_with_real_custom_policy(self): from google.cloud.bigquery.schema import PolicyTagList From bdfe888f44c7114954e3b15f3139ad3a9ade83c2 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 18 Jan 2023 19:20:16 +0000 Subject: [PATCH 066/536] chore(deps): update dependency packaging to v23 (#1462) * chore(deps): update dependency packaging to v23 * bump google-cloud-bigquery Co-authored-by: Anthonios Partheniou Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> --- samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 0f475e0fd..994d579aa 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.16.0 -google-cloud-bigquery==3.4.1 +google-cloud-bigquery==3.4.2 google-cloud-bigquery-storage==2.18.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 @@ -24,7 +24,7 @@ idna==3.4 libcst==0.4.9 munch==2.5.0 mypy-extensions==0.4.3 -packaging==21.3 +packaging==23.0 pandas===1.3.5; python_version == '3.7' pandas==1.5.2; python_version >= '3.8' proto-plus==1.22.2 From 0dab7d25ace4b63d2984485e7b0c5bb38f20476f Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Wed, 18 Jan 2023 13:53:49 -0600 Subject: [PATCH 067/536] =?UTF-8?q?docs:=20revise=20delete=20label=20table?= =?UTF-8?q?=20code=20sample,=20add=20TODO=20to=20clean=20up=20sni=E2=80=A6?= =?UTF-8?q?=20(#1466)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: revise delete label table code sample, add TODO to clean up snippets.py * changed name of test function to align with file name Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> Co-authored-by: Anthonios Partheniou --- docs/snippets.py | 2 + samples/snippets/delete_label_table.py | 43 +++++++++++++++++++++ samples/snippets/delete_label_table_test.py | 34 ++++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 samples/snippets/delete_label_table.py create mode 100644 samples/snippets/delete_label_table_test.py diff --git a/docs/snippets.py b/docs/snippets.py index 85856eb3e..eca2b1353 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -203,6 +203,8 @@ def test_manage_table_labels(client, to_delete): # [END bigquery_get_table_labels] assert table.labels == labels + # TODO(Mattix23): After code sample is updated from cloud.google.com delete this + # [START bigquery_delete_label_table] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/samples/snippets/delete_label_table.py b/samples/snippets/delete_label_table.py new file mode 100644 index 000000000..0e9eaaf8f --- /dev/null +++ b/samples/snippets/delete_label_table.py @@ -0,0 +1,43 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + + +def delete_label_table(table_id: str, label_key: str) -> bigquery.Table: + orig_table_id = table_id + orig_label_key = label_key + # [START bigquery_delete_label_table] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you wish to delete from. + table_id = "your-project.your_dataset.your_table_name" + # TODO(dev): Change label_key to the name of the label you want to remove. + label_key = "color" + # [END bigquery_delete_label_table] + table_id = orig_table_id + label_key = orig_label_key + # [START bigquery_delete_label_table] + table = client.get_table(table_id) # API request + + # To delete a label from a table, set its value to None + table.labels[label_key] = None + + table = client.update_table(table, ["labels"]) # API request + + print(f"Deleted label '{label_key}' from {table_id}.") + # [END bigquery_delete_label_table] + return table diff --git a/samples/snippets/delete_label_table_test.py b/samples/snippets/delete_label_table_test.py new file mode 100644 index 000000000..54acae77f --- /dev/null +++ b/samples/snippets/delete_label_table_test.py @@ -0,0 +1,34 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import delete_label_table + +if typing.TYPE_CHECKING: + import pytest + + +def test_delete_label_table( + capsys: "pytest.CaptureFixture[str]", + table_id: str, +) -> None: + + table = delete_label_table.delete_label_table(table_id, "color") + + out, _ = capsys.readouterr() + assert "Deleted" in out + assert "color" in out + assert table_id in out + assert table.labels is None or "color" not in table.labels From f0ace2ac2307ef359511a235f80f5ce9e46264c1 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 18 Jan 2023 16:29:06 -0600 Subject: [PATCH 068/536] docs: adds snippet for creating table with external data config (#1420) * docs: add samples for creating table with external data configuration and creating an external table definition Co-authored-by: Anthonios Partheniou --- docs/usage/tables.rst | 9 +++ ...reate_table_external_data_configuration.py | 66 +++++++++++++++++++ samples/tests/conftest.py | 18 ++++- ...reate_table_external_data_configuration.py | 32 +++++++++ 4 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 samples/create_table_external_data_configuration.py create mode 100644 samples/tests/test_create_table_external_data_configuration.py diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst index d924fe214..105e93637 100644 --- a/docs/usage/tables.rst +++ b/docs/usage/tables.rst @@ -58,6 +58,15 @@ Create an empty table with the :start-after: [START bigquery_create_table] :end-before: [END bigquery_create_table] +Create a table using an external data source with the +:func:`~google.cloud.bigquery.client.Client.create_table` method: + +.. literalinclude:: ../samples/create_table_external_data_configuration.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table_external_data_configuration] + :end-before: [END bigquery_create_table_external_data_configuration] + Create a clustered table with the :func:`~google.cloud.bigquery.client.Client.create_table` method: diff --git a/samples/create_table_external_data_configuration.py b/samples/create_table_external_data_configuration.py new file mode 100644 index 000000000..068f91555 --- /dev/null +++ b/samples/create_table_external_data_configuration.py @@ -0,0 +1,66 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_external_data_configuration( + table_id: str, +) -> None: + """Create a table using an external data source""" + orig_table_id = table_id + # [START bigquery_create_table_external_data_configuration] + # [START bigquery_create_external_table_definition] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + table_id = "your-project.your_dataset.your_table_name" + # [END bigquery_create_table_external_data_configuration] + table_id = orig_table_id + # [START bigquery_create_table_external_data_configuration] + + # TODO(developer): Set the external source format of your table. + # Note that the set of allowed values for external data sources is + # different than the set used for loading data (see :class:`~google.cloud.bigquery.job.SourceFormat`). + external_source_format = "AVRO" + + # TODO(developer): Set the source_uris to point to your data in Google Cloud + source_uris = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro", + ] + + # Create ExternalConfig object with external source format + external_config = bigquery.ExternalConfig(external_source_format) + # Set source_uris that point to your data in Google Cloud + external_config.source_uris = source_uris + + # TODO(developer) You have the option to set a reference_file_schema_uri, which points to + # a reference file for the table schema + reference_file_schema_uri = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro" + + external_config.reference_file_schema_uri = reference_file_schema_uri + # [END bigquery_create_external_table_definition] + + table = bigquery.Table(table_id) + # Set the external data configuration of the table + table.external_data_configuration = external_config + table = client.create_table(table) # Make an API request. + + print( + f"Created table with external source format {table.external_data_configuration.source_format}" + ) + # [END bigquery_create_table_external_data_configuration] diff --git a/samples/tests/conftest.py b/samples/tests/conftest.py index b7a2ad587..99bd2e367 100644 --- a/samples/tests/conftest.py +++ b/samples/tests/conftest.py @@ -13,7 +13,7 @@ # limitations under the License. import datetime -from typing import Iterator +from typing import Iterator, List import uuid import google.auth @@ -47,6 +47,22 @@ def random_table_id(dataset_id: str) -> str: return "{}.{}".format(dataset_id, random_table_id) +@pytest.fixture +def avro_source_uris() -> List[str]: + avro_source_uris = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro", + ] + return avro_source_uris + + +@pytest.fixture +def reference_file_schema_uri() -> str: + reference_file_schema_uri = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro" + return reference_file_schema_uri + + @pytest.fixture def random_dataset_id(client: bigquery.Client) -> Iterator[str]: now = datetime.datetime.now() diff --git a/samples/tests/test_create_table_external_data_configuration.py b/samples/tests/test_create_table_external_data_configuration.py new file mode 100644 index 000000000..bf4cf17d4 --- /dev/null +++ b/samples/tests/test_create_table_external_data_configuration.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from .. import create_table_external_data_configuration + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_table_external_data_configuration( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + create_table_external_data_configuration.create_table_external_data_configuration( + random_table_id + ) + out, _ = capsys.readouterr() + assert "Created table with external source format AVRO" in out From 7fe04dbe0e2811a5cfea08a0e1809437410c3ffa Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 23 Jan 2023 15:47:40 +0000 Subject: [PATCH 069/536] chore(deps): update all dependencies (#1471) Co-authored-by: Anthonios Partheniou --- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 2 +- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 1e6b7c5ea..100e0639c 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.2.0 +pytest==7.2.1 mock==5.0.1 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 994d579aa..c8c063ba8 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -34,7 +34,7 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.7 +pytz==2022.7.1 PyYAML==6.0 requests==2.28.2 rsa==4.9 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 56aa0f432..e8f3982c7 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.0 +pytest==7.2.1 mock==5.0.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 463829c8f..9fd09f557 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -11,5 +11,5 @@ matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.2; python_version >= '3.8' pyarrow==10.0.1 -pytz==2022.7 +pytz==2022.7.1 typing-extensions==4.4.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 56aa0f432..e8f3982c7 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.0 +pytest==7.2.1 mock==5.0.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1ecf3fc45..8a7b97563 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.5 -google-cloud-bigquery==3.4.1 +google-cloud-bigquery==3.4.2 google-cloud-bigquery-storage==2.18.0 google-auth-oauthlib==0.8.0 grpcio==1.51.1 @@ -12,5 +12,5 @@ matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.2; python_version >= '3.8' pyarrow==10.0.1 -pytz==2022.7 +pytz==2022.7.1 typing-extensions==4.4.0 From fa8f6ec86892c78a44a52884073d159ac6690238 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 23 Jan 2023 16:31:18 +0000 Subject: [PATCH 070/536] chore(deps): update all dependencies (#1475) --- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c8c063ba8..3a28bf2b5 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -17,7 +17,7 @@ google-cloud-bigquery==3.4.2 google-cloud-bigquery-storage==2.18.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 -google-resumable-media==2.4.0 +google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 grpcio==1.51.1 idna==3.4 @@ -26,7 +26,7 @@ munch==2.5.0 mypy-extensions==0.4.3 packaging==23.0 pandas===1.3.5; python_version == '3.7' -pandas==1.5.2; python_version >= '3.8' +pandas==1.5.3; python_version >= '3.8' proto-plus==1.22.2 pyarrow==10.0.1 pyasn1==0.4.8 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 9fd09f557..2122f12db 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -9,7 +9,7 @@ ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.2; python_version >= '3.8' +pandas==1.5.3; python_version >= '3.8' pyarrow==10.0.1 pytz==2022.7.1 typing-extensions==4.4.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8a7b97563..7b0487f06 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -10,7 +10,7 @@ ipython==8.8.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.2; python_version >= '3.8' +pandas==1.5.3; python_version >= '3.8' pyarrow==10.0.1 pytz==2022.7.1 typing-extensions==4.4.0 From a71888a60d1e5e5815ab459fe24368ad5b0d032a Mon Sep 17 00:00:00 2001 From: adhiggs <38925938+adhiggs@users.noreply.github.com> Date: Mon, 23 Jan 2023 16:08:12 -0800 Subject: [PATCH 071/536] docs(samples): table variable fix (#1287) Updated table variable in "# Print row data in tabular format." to point at the correct table_id. Fixes https://togithub.com/googleapis/python-bigquery/issues/1286 --- samples/browse_table_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/browse_table_data.py b/samples/browse_table_data.py index 6a56253bf..74b903aa3 100644 --- a/samples/browse_table_data.py +++ b/samples/browse_table_data.py @@ -47,7 +47,7 @@ def browse_table_data(table_id: str) -> None: print("Downloaded {} rows from table {}".format(len(rows), table_id)) # Print row data in tabular format. - rows_iter = client.list_rows(table, max_results=10) + rows_iter = client.list_rows(table_id, max_results=10) format_string = "{!s:<16} " * len(rows_iter.schema) field_names = [field.name for field in rows_iter.schema] print(format_string.format(*field_names)) # Prints column headers. From e69c81d1ef87a37ccdcca79cc73502acbc2b23ee Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 25 Jan 2023 18:43:49 +0000 Subject: [PATCH 072/536] chore(deps): update dependency google-cloud-bigquery-storage to v2.18.1 (#1476) Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3a28bf2b5..a6a33af5a 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.16.0 google-cloud-bigquery==3.4.2 -google-cloud-bigquery-storage==2.18.0 +google-cloud-bigquery-storage==2.18.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 2122f12db..ba90dab78 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.5 -google-cloud-bigquery-storage==2.18.0 +google-cloud-bigquery-storage==2.18.1 google-auth-oauthlib==0.8.0 grpcio==1.51.1 ipywidgets==8.0.4 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7b0487f06..b20934886 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.5 google-cloud-bigquery==3.4.2 -google-cloud-bigquery-storage==2.18.0 +google-cloud-bigquery-storage==2.18.1 google-auth-oauthlib==0.8.0 grpcio==1.51.1 ipywidgets==8.0.4 From 58eff2c746aaa436b15e54bba8688a6c72d008cf Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 26 Jan 2023 17:08:45 +0000 Subject: [PATCH 073/536] chore(deps): update dependency geojson to v3 (#1478) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index a6a33af5a..1a332cdaf 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -8,7 +8,7 @@ cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.0.5 Fiona==1.8.22 -geojson==2.5.0 +geojson==3.0.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 From 9792ae0f1d28c8c9e28c89b0c57b642bb8f1b5ab Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 28 Jan 2023 11:39:21 +0000 Subject: [PATCH 074/536] chore(deps): update all dependencies (#1479) --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1a332cdaf..294cb095d 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -28,7 +28,7 @@ packaging==23.0 pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' proto-plus==1.22.2 -pyarrow==10.0.1 +pyarrow==11.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index ba90dab78..2446aa5e8 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -5,11 +5,11 @@ grpcio==1.51.1 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.8.0; python_version >= '3.9' +ipython==8.9.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' -pyarrow==10.0.1 +pyarrow==11.0.0 pytz==2022.7.1 typing-extensions==4.4.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index b20934886..a85653d42 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -6,11 +6,11 @@ grpcio==1.51.1 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.8.0; python_version >= '3.9' +ipython==8.9.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.6.3; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' -pyarrow==10.0.1 +pyarrow==11.0.0 pytz==2022.7.1 typing-extensions==4.4.0 From b63899f8a4608607eb5627f0ea5701d673537317 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 30 Jan 2023 10:38:10 +0000 Subject: [PATCH 075/536] chore(deps): update dependency shapely to v2.0.1 (#1480) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 294cb095d..d3fee4c2f 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -38,7 +38,7 @@ pytz==2022.7.1 PyYAML==6.0 requests==2.28.2 rsa==4.9 -Shapely==2.0.0 +Shapely==2.0.1 six==1.16.0 typing-extensions==4.4.0 typing-inspect==0.8.0 From 903dcddf7ee7a408dd205f53efc1c1feea9cd0ff Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 30 Jan 2023 18:34:14 +0000 Subject: [PATCH 076/536] chore(deps): update dependency fiona to v1.9.0 (#1482) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d3fee4c2f..284614d17 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -7,7 +7,7 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.0.5 -Fiona==1.8.22 +Fiona==1.9.0 geojson==3.0.0 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' From f32df1fb74e4aea24cd8a4099040ad2f7436e54d Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Mon, 30 Jan 2023 14:55:16 -0600 Subject: [PATCH 077/536] feat: add __str__ method to DatasetReference (#1477) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add __str__ method to DatasetReference * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot --- google/cloud/bigquery/dataset.py | 3 +++ tests/unit/test_dataset.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index c30204067..0edd29359 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -215,6 +215,9 @@ def __ne__(self, other): def __hash__(self): return hash(self._key()) + def __str__(self): + return f"{self.project}.{self._dataset_id}" + def __repr__(self): return "DatasetReference{}".format(self._key()) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 856674daf..5e26a0c03 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -622,6 +622,10 @@ def test___repr__(self): expected = "DatasetReference('project1', 'dataset1')" self.assertEqual(repr(dataset), expected) + def test___str__(self): + dataset = self._make_one("project1", "dataset1") + self.assertEqual(str(dataset), "project1.dataset1") + class TestDataset(unittest.TestCase): from google.cloud.bigquery.dataset import DatasetReference From 24793947f8fc8aaec38d8c78ebbb3a9420992a21 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Mon, 30 Jan 2023 15:25:23 -0600 Subject: [PATCH 078/536] chore: delete duplicate code from snippets.py (#1481) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: delete duplicate code from snippets.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- docs/snippets.py | 60 ------------------------------------------------ 1 file changed, 60 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index eca2b1353..e1d9ae839 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -164,66 +164,6 @@ def test_create_partitioned_table(client, to_delete): "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" ) ) -def test_manage_table_labels(client, to_delete): - dataset_id = "label_table_dataset_{}".format(_millis()) - table_id = "label_table_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - - labels = {"color": "green"} - table.labels = labels - table = client.create_table(table) - - # TODO(Mattix23): After code sample is updated from cloud.google.com delete this - - # [START bigquery_get_table_labels] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - table_ref = dataset_ref.table(table_id) - table = client.get_table(table_ref) # API Request - - # View table labels - print("Table ID: {}".format(table_id)) - print("Labels:") - if table.labels: - for label, value in table.labels.items(): - print("\t{}: {}".format(label, value)) - else: - print("\tTable has no labels defined.") - # [END bigquery_get_table_labels] - assert table.labels == labels - - # TODO(Mattix23): After code sample is updated from cloud.google.com delete this - - # [START bigquery_delete_label_table] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, dataset_id) - # table_ref = dataset_ref.table('my_table') - # table = client.get_table(table_ref) # API request - - # This example table starts with one label - assert table.labels == {"color": "green"} - # To delete a label from a table, set its value to None - table.labels["color"] = None - - table = client.update_table(table, ["labels"]) # API request - - assert table.labels == {} - # [END bigquery_delete_label_table] - - @pytest.mark.skip( reason=( "update_table() is flaky " From bd1da9aa0a40b02b7d5409a0b094d8380e255c91 Mon Sep 17 00:00:00 2001 From: Mario Torres Jr <105736410+Mattix23@users.noreply.github.com> Date: Tue, 31 Jan 2023 12:06:02 -0600 Subject: [PATCH 079/536] feat: add preserveAsciiControlCharacter to LoadJobConfig (#1484) --- google/cloud/bigquery/job/load.py | 13 +++++++++++++ tests/unit/job/test_load_config.py | 14 ++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 5c7f26841..14a7fa30b 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -311,6 +311,19 @@ def null_marker(self): def null_marker(self, value): self._set_sub_prop("nullMarker", value) + @property + def preserve_ascii_control_characters(self): + """Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.preserve_ascii_control_characters + """ + return self._get_sub_prop("preserveAsciiControlCharacters") + + @preserve_ascii_control_characters.setter + def preserve_ascii_control_characters(self, value): + self._set_sub_prop("preserveAsciiControlCharacters", bool(value)) + @property def projection_fields(self) -> Optional[List[str]]: """Optional[List[str]]: If diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 5a0c5a83f..7f77fc085 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -424,6 +424,20 @@ def test_null_marker_setter(self): config.null_marker = null_marker self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + def test_preserve_ascii_control_characters_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.preserve_ascii_control_characters) + + def test_preserve_ascii_control_characters_hit(self): + config = self._get_target_class()() + config._properties["load"]["preserveAsciiControlCharacters"] = True + self.assertTrue(config.preserve_ascii_control_characters) + + def test_preserve_ascii_control_characters_setter(self): + config = self._get_target_class()() + config.preserve_ascii_control_characters = True + self.assertTrue(config._properties["load"]["preserveAsciiControlCharacters"]) + def test_projection_fields_miss(self): config = self._get_target_class()() self.assertIsNone(config.projection_fields) From 204779c57e706c002ababff72f8e45f1e376b960 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 1 Feb 2023 12:41:48 -0600 Subject: [PATCH 080/536] chore(main): release 3.5.0 (#1472) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 15 +++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a5b4d179..454d362f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.5.0](https://github.com/googleapis/python-bigquery/compare/v3.4.2...v3.5.0) (2023-01-31) + + +### Features + +* Add __str__ method to DatasetReference ([#1477](https://github.com/googleapis/python-bigquery/issues/1477)) ([f32df1f](https://github.com/googleapis/python-bigquery/commit/f32df1fb74e4aea24cd8a4099040ad2f7436e54d)) +* Add preserveAsciiControlCharacter to LoadJobConfig ([#1484](https://github.com/googleapis/python-bigquery/issues/1484)) ([bd1da9a](https://github.com/googleapis/python-bigquery/commit/bd1da9aa0a40b02b7d5409a0b094d8380e255c91)) + + +### Documentation + +* Adds snippet for creating table with external data config ([#1420](https://github.com/googleapis/python-bigquery/issues/1420)) ([f0ace2a](https://github.com/googleapis/python-bigquery/commit/f0ace2ac2307ef359511a235f80f5ce9e46264c1)) +* Revise delete label table code sample, add TODO to clean up sni… ([#1466](https://github.com/googleapis/python-bigquery/issues/1466)) ([0dab7d2](https://github.com/googleapis/python-bigquery/commit/0dab7d25ace4b63d2984485e7b0c5bb38f20476f)) +* **samples:** Table variable fix ([#1287](https://github.com/googleapis/python-bigquery/issues/1287)) ([a71888a](https://github.com/googleapis/python-bigquery/commit/a71888a60d1e5e5815ab459fe24368ad5b0d032a)) + ## [3.4.2](https://github.com/googleapis/python-bigquery/compare/v3.4.1...v3.4.2) (2023-01-13) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index d38bb4619..13194aa56 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.4.2" +__version__ = "3.5.0" From beab7c2b27c27d8e824cbc66b290be8158da7abf Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 2 Feb 2023 11:32:44 +0000 Subject: [PATCH 081/536] chore(deps): update dependency google-cloud-bigquery to v3.5.0 (#1485) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 284614d17..2bc282292 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.16.0 -google-cloud-bigquery==3.4.2 +google-cloud-bigquery==3.5.0 google-cloud-bigquery-storage==2.18.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index a85653d42..86739eec4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.0.5 -google-cloud-bigquery==3.4.2 +google-cloud-bigquery==3.5.0 google-cloud-bigquery-storage==2.18.1 google-auth-oauthlib==0.8.0 grpcio==1.51.1 From a190aaa09ae73e8b6a83b7b213247f95fde57615 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 8 Feb 2023 10:05:10 -0600 Subject: [PATCH 082/536] fix: annotate optional integer parameters with optional type (#1487) * fix: annotate optional integer parameters with optional type * remove google-cloud-core reference causing type checker issues deps: update minimum google-cloud-core to 1.6.0 --- google/cloud/bigquery/_http.py | 12 --------- google/cloud/bigquery/client.py | 42 +++++++++++++++--------------- google/cloud/bigquery/job/query.py | 6 ++--- setup.py | 2 +- testing/constraints-3.7.txt | 2 +- 5 files changed, 26 insertions(+), 38 deletions(-) diff --git a/google/cloud/bigquery/_http.py b/google/cloud/bigquery/_http.py index 789ef9243..7921900f8 100644 --- a/google/cloud/bigquery/_http.py +++ b/google/cloud/bigquery/_http.py @@ -14,22 +14,10 @@ """Create / interact with Google BigQuery connections.""" -import os -import pkg_resources - from google.cloud import _http # type: ignore # pytype: disable=import-error from google.cloud.bigquery import __version__ -# TODO: Increase the minimum version of google-cloud-core to 1.6.0 -# and remove this logic. See: -# https://github.com/googleapis/python-bigquery/issues/509 -if os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true": # pragma: NO COVER - release = pkg_resources.get_distribution("google-cloud-core").parsed_version - if release < pkg_resources.parse_version("1.6.0"): - raise ImportError("google-cloud-core >= 1.6.0 is required to use mTLS feature") - - class Connection(_http.JSONConnection): """A connection to Google BigQuery via the JSON REST API. diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 1885ab67e..4f6e6610d 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -329,11 +329,11 @@ def get_service_account_email( def list_projects( self, - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -395,11 +395,11 @@ def list_datasets( project: str = None, include_all: bool = False, filter: str = None, - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -1324,11 +1324,11 @@ def update_table( def list_models( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1401,11 +1401,11 @@ def api_request(*args, **kwargs): def list_routines( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1478,11 +1478,11 @@ def api_request(*args, **kwargs): def list_tables( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1838,7 +1838,7 @@ def _get_query_results( job_id: str, retry: retries.Retry, project: str = None, - timeout_ms: int = None, + timeout_ms: Optional[int] = None, location: str = None, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> _QueryResults: @@ -2163,7 +2163,7 @@ def list_jobs( self, project: str = None, parent_job: Optional[Union[QueryJob, str]] = None, - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, all_users: bool = None, state_filter: str = None, @@ -2171,7 +2171,7 @@ def list_jobs( timeout: TimeoutType = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, - page_size: int = None, + page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List jobs for the project associated with this client. @@ -2361,7 +2361,7 @@ def load_table_from_file( file_obj: IO[bytes], destination: Union[Table, TableReference, TableListItem, str], rewind: bool = False, - size: int = None, + size: Optional[int] = None, num_retries: int = _DEFAULT_NUM_RETRIES, job_id: str = None, job_id_prefix: str = None, @@ -3729,10 +3729,10 @@ def list_rows( self, table: Union[Table, TableListItem, TableReference, str], selected_fields: Sequence[SchemaField] = None, - max_results: int = None, + max_results: Optional[int] = None, page_token: str = None, - start_index: int = None, - page_size: int = None, + start_index: Optional[int] = None, + page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> RowIterator: @@ -3840,11 +3840,11 @@ def _list_rows_from_query_results( location: str, project: str, schema: SchemaField, - total_rows: int = None, + total_rows: Optional[int] = None, destination: Union[Table, TableReference, TableListItem, str] = None, - max_results: int = None, - start_index: int = None, - page_size: int = None, + max_results: Optional[int] = None, + start_index: Optional[int] = None, + page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> RowIterator: diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index e32e74129..c63fa0892 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1381,11 +1381,11 @@ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): def result( # type: ignore # (complaints about the overloaded signature) self, - page_size: int = None, - max_results: int = None, + page_size: Optional[int] = None, + max_results: Optional[int] = None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None, - start_index: int = None, + start_index: Optional[int] = None, job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. diff --git a/setup.py b/setup.py index 81cb2e5db..e1b018098 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-cloud-core >= 1.4.1, <3.0.0dev", + "google-cloud-core >= 1.6.0, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 20.0.0", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 3701c7343..746656b58 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -9,7 +9,7 @@ db-dtypes==0.3.0 geopandas==0.9.0 google-api-core==1.31.5 google-cloud-bigquery-storage==2.0.0 -google-cloud-core==1.4.1 +google-cloud-core==1.6.0 google-resumable-media==0.6.0 grpcio==1.47.0 ipywidgets==7.7.1 From 1b31c2fca96ef0c0a6365780ddbda535f25c6b25 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 8 Feb 2023 16:40:16 +0000 Subject: [PATCH 083/536] build(deps): bump cryptography from 38.0.3 to 39.0.1 in /synthtool/gcp/templates/python_library/.kokoro (#1489) Source-Link: https://togithub.com/googleapis/synthtool/commit/bb171351c3946d3c3c32e60f5f18cee8c464ec51 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:f62c53736eccb0c4934a3ea9316e0d57696bb49c1a7c86c726e9bb8a2f87dadf --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/requirements.txt | 49 ++++++++++++++++++--------------------- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 889f77dfa..894fb6bc9 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:c43f1d918bcf817d337aa29ff833439494a158a0831508fda4ec75dc4c0d0320 + digest: sha256:f62c53736eccb0c4934a3ea9316e0d57696bb49c1a7c86c726e9bb8a2f87dadf diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 05dc4672e..096e4800a 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -113,33 +113,28 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==38.0.3 \ - --hash=sha256:068147f32fa662c81aebab95c74679b401b12b57494872886eb5c1139250ec5d \ - --hash=sha256:06fc3cc7b6f6cca87bd56ec80a580c88f1da5306f505876a71c8cfa7050257dd \ - --hash=sha256:25c1d1f19729fb09d42e06b4bf9895212292cb27bb50229f5aa64d039ab29146 \ - --hash=sha256:402852a0aea73833d982cabb6d0c3bb582c15483d29fb7085ef2c42bfa7e38d7 \ - --hash=sha256:4e269dcd9b102c5a3d72be3c45d8ce20377b8076a43cbed6f660a1afe365e436 \ - --hash=sha256:5419a127426084933076132d317911e3c6eb77568a1ce23c3ac1e12d111e61e0 \ - --hash=sha256:554bec92ee7d1e9d10ded2f7e92a5d70c1f74ba9524947c0ba0c850c7b011828 \ - --hash=sha256:5e89468fbd2fcd733b5899333bc54d0d06c80e04cd23d8c6f3e0542358c6060b \ - --hash=sha256:65535bc550b70bd6271984d9863a37741352b4aad6fb1b3344a54e6950249b55 \ - --hash=sha256:6ab9516b85bebe7aa83f309bacc5f44a61eeb90d0b4ec125d2d003ce41932d36 \ - --hash=sha256:6addc3b6d593cd980989261dc1cce38263c76954d758c3c94de51f1e010c9a50 \ - --hash=sha256:728f2694fa743a996d7784a6194da430f197d5c58e2f4e278612b359f455e4a2 \ - --hash=sha256:785e4056b5a8b28f05a533fab69febf5004458e20dad7e2e13a3120d8ecec75a \ - --hash=sha256:78cf5eefac2b52c10398a42765bfa981ce2372cbc0457e6bf9658f41ec3c41d8 \ - --hash=sha256:7f836217000342d448e1c9a342e9163149e45d5b5eca76a30e84503a5a96cab0 \ - --hash=sha256:8d41a46251bf0634e21fac50ffd643216ccecfaf3701a063257fe0b2be1b6548 \ - --hash=sha256:984fe150f350a3c91e84de405fe49e688aa6092b3525f407a18b9646f6612320 \ - --hash=sha256:9b24bcff7853ed18a63cfb0c2b008936a9554af24af2fb146e16d8e1aed75748 \ - --hash=sha256:b1b35d9d3a65542ed2e9d90115dfd16bbc027b3f07ee3304fc83580f26e43249 \ - --hash=sha256:b1b52c9e5f8aa2b802d48bd693190341fae201ea51c7a167d69fc48b60e8a959 \ - --hash=sha256:bbf203f1a814007ce24bd4d51362991d5cb90ba0c177a9c08825f2cc304d871f \ - --hash=sha256:be243c7e2bfcf6cc4cb350c0d5cdf15ca6383bbcb2a8ef51d3c9411a9d4386f0 \ - --hash=sha256:bfbe6ee19615b07a98b1d2287d6a6073f734735b49ee45b11324d85efc4d5cbd \ - --hash=sha256:c46837ea467ed1efea562bbeb543994c2d1f6e800785bd5a2c98bc096f5cb220 \ - --hash=sha256:dfb4f4dd568de1b6af9f4cda334adf7d72cf5bc052516e1b2608b683375dd95c \ - --hash=sha256:ed7b00096790213e09eb11c97cc6e2b757f15f3d2f85833cd2d3ec3fe37c1722 +cryptography==39.0.1 \ + --hash=sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4 \ + --hash=sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f \ + --hash=sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502 \ + --hash=sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41 \ + --hash=sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965 \ + --hash=sha256:6f8ba7f0328b79f08bdacc3e4e66fb4d7aab0c3584e0bd41328dce5262e26b2e \ + --hash=sha256:706843b48f9a3f9b9911979761c91541e3d90db1ca905fd63fee540a217698bc \ + --hash=sha256:807ce09d4434881ca3a7594733669bd834f5b2c6d5c7e36f8c00f691887042ad \ + --hash=sha256:83e17b26de248c33f3acffb922748151d71827d6021d98c70e6c1a25ddd78505 \ + --hash=sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388 \ + --hash=sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6 \ + --hash=sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2 \ + --hash=sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac \ + --hash=sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695 \ + --hash=sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6 \ + --hash=sha256:e422abdec8b5fa8462aa016786680720d78bdce7a30c652b7fadf83a4ba35336 \ + --hash=sha256:ef8b72fa70b348724ff1218267e7f7375b8de4e8194d1636ee60510aae104cd0 \ + --hash=sha256:f0c64d1bd842ca2633e74a1a28033d139368ad959872533b1bab8c80e8240a0c \ + --hash=sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106 \ + --hash=sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a \ + --hash=sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8 # via # gcp-releasetool # secretstorage From f832e7a0b79f3567a0773ff11630e2f48bed60db Mon Sep 17 00:00:00 2001 From: nayaknishant Date: Wed, 15 Feb 2023 12:16:57 -0800 Subject: [PATCH 084/536] feat: adding preserveAsciiControlCharacter to CSVOptions (#1491) * adding ASCII support for external config * adding tests for preserveAscii... * adding tests for preserveAscii... * changing 'False' to False * linting --- google/cloud/bigquery/external_config.py | 14 ++++++++++++++ tests/unit/test_external_config.py | 14 +++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index bd60e4ef1..a891bc232 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -418,6 +418,20 @@ def encoding(self): def encoding(self, value): self._properties["encoding"] = value + @property + def preserve_ascii_control_characters(self): + """bool: Indicates if the embedded ASCII control characters + (the first 32 characters in the ASCII-table, from '\x00' to '\x1F') are preserved. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.preserve_ascii_control_characters + """ + return self._properties.get("preserveAsciiControlCharacters") + + @preserve_ascii_control_characters.setter + def preserve_ascii_control_characters(self, value): + self._properties["preserveAsciiControlCharacters"] = value + @property def field_delimiter(self): """str: The separator for fields in a CSV file. Defaults to comma (','). diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 72fe2761a..67fd13fa7 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -248,6 +248,7 @@ def test_from_api_repr_csv(self): "allowQuotedNewlines": True, "allowJaggedRows": False, "encoding": "encoding", + "preserveAsciiControlCharacters": False, }, }, ) @@ -263,6 +264,7 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_quoted_newlines, True) self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") + self.assertEqual(ec.options.preserve_ascii_control_characters, False) got_resource = ec.to_api_repr() @@ -283,6 +285,7 @@ def test_to_api_repr_csv(self): options.quote_character = "quote" options.skip_leading_rows = 123 options.allow_jagged_rows = False + options.preserve_ascii_control_characters = False ec.csv_options = options exp_resource = { @@ -294,6 +297,7 @@ def test_to_api_repr_csv(self): "allowQuotedNewlines": True, "allowJaggedRows": False, "encoding": "encoding", + "preserveAsciiControlCharacters": False, }, } @@ -514,17 +518,23 @@ def test_csv_options_getter_and_setter(self): from google.cloud.bigquery.external_config import CSVOptions options = CSVOptions.from_api_repr( - {"allowJaggedRows": True, "allowQuotedNewlines": False} + { + "allowJaggedRows": True, + "allowQuotedNewlines": False, + "preserveAsciiControlCharacters": False, + } ) ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) self.assertIsNone(ec.csv_options.allow_jagged_rows) self.assertIsNone(ec.csv_options.allow_quoted_newlines) + self.assertIsNone(ec.csv_options.preserve_ascii_control_characters) ec.csv_options = options self.assertTrue(ec.csv_options.allow_jagged_rows) self.assertFalse(ec.csv_options.allow_quoted_newlines) + self.assertFalse(ec.csv_options.preserve_ascii_control_characters) self.assertIs(ec.options._properties, ec._properties[CSVOptions._RESOURCE_NAME]) self.assertIs( ec.csv_options._properties, ec._properties[CSVOptions._RESOURCE_NAME] @@ -848,6 +858,7 @@ def test_to_api_repr(self): options.allow_quoted_newlines = True options.allow_jagged_rows = False options.encoding = "UTF-8" + options.preserve_ascii_control_characters = False resource = options.to_api_repr() @@ -860,6 +871,7 @@ def test_to_api_repr(self): "allowQuotedNewlines": True, "allowJaggedRows": False, "encoding": "UTF-8", + "preserveAsciiControlCharacters": False, }, ) From 7b5f309159bcf82aaa4c07f5a6a8b6b54f4b77ab Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 16 Feb 2023 20:35:27 +0000 Subject: [PATCH 085/536] chore(deps): update all dependencies to v1 (#1486) Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 2bc282292..f22625653 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -23,7 +23,7 @@ grpcio==1.51.1 idna==3.4 libcst==0.4.9 munch==2.5.0 -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 packaging==23.0 pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 2446aa5e8..725975116 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.0.5 google-cloud-bigquery-storage==2.18.1 -google-auth-oauthlib==0.8.0 +google-auth-oauthlib==1.0.0 grpcio==1.51.1 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 86739eec4..50fd19e51 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.0.5 google-cloud-bigquery==3.5.0 google-cloud-bigquery-storage==2.18.1 -google-auth-oauthlib==0.8.0 +google-auth-oauthlib==1.0.0 grpcio==1.51.1 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' From 665d7ba74a1b45de1ef51cc75b6860125afc5fe6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 22 Feb 2023 11:55:47 -0500 Subject: [PATCH 086/536] Fix: removes scope to avoid unnecessary duplication (#1503) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix: removes scope to avoid unnecessary duplication * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- google/cloud/bigquery/client.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 4f6e6610d..b03266528 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -225,10 +225,7 @@ class Client(ClientWithProject): to acquire default credentials. """ - SCOPE = ( # type: ignore - "https://www.googleapis.com/auth/bigquery", - "https://www.googleapis.com/auth/cloud-platform", - ) + SCOPE = ("https://www.googleapis.com/auth/cloud-platform",) # type: ignore """The scopes required for authenticating as a BigQuery consumer.""" def __init__( From 20d3276cc29e9467eef9476d5fd572099d9a3f6f Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 22 Feb 2023 15:56:49 -0500 Subject: [PATCH 087/536] Fix: loosen ipywidget dependency (#1504) * fix: updates ipywidget dependency * fix: updates ipywidget version number --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e1b018098..2119e0191 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ pyarrow_dependency, "db-dtypes>=0.3.0,<2.0.0dev", ], - "ipywidgets": ["ipywidgets==7.7.1"], + "ipywidgets": ["ipywidgets>=7.7.0,<8.0.1"], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <2.0dev"], "ipython": ["ipython>=7.0.1,!=8.1.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], From 7f4d753a4e1c72ff443f1b85c2682a91f49ae339 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 22 Feb 2023 16:37:28 -0500 Subject: [PATCH 088/536] chore(main): release 3.6.0 (#1490) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 454d362f9..67c43200b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.6.0](https://github.com/googleapis/python-bigquery/compare/v3.5.0...v3.6.0) (2023-02-22) + + +### Features + +* Adding preserveAsciiControlCharacter to CSVOptions ([#1491](https://github.com/googleapis/python-bigquery/issues/1491)) ([f832e7a](https://github.com/googleapis/python-bigquery/commit/f832e7a0b79f3567a0773ff11630e2f48bed60db)) + + +### Bug Fixes + +* Annotate optional integer parameters with optional type ([#1487](https://github.com/googleapis/python-bigquery/issues/1487)) ([a190aaa](https://github.com/googleapis/python-bigquery/commit/a190aaa09ae73e8b6a83b7b213247f95fde57615)) +* Loosen ipywidget dependency ([#1504](https://github.com/googleapis/python-bigquery/issues/1504)) ([20d3276](https://github.com/googleapis/python-bigquery/commit/20d3276cc29e9467eef9476d5fd572099d9a3f6f)) +* Removes scope to avoid unnecessary duplication ([#1503](https://github.com/googleapis/python-bigquery/issues/1503)) ([665d7ba](https://github.com/googleapis/python-bigquery/commit/665d7ba74a1b45de1ef51cc75b6860125afc5fe6)) + + +### Dependencies + +* Update minimum google-cloud-core to 1.6.0 ([a190aaa](https://github.com/googleapis/python-bigquery/commit/a190aaa09ae73e8b6a83b7b213247f95fde57615)) + ## [3.5.0](https://github.com/googleapis/python-bigquery/compare/v3.4.2...v3.5.0) (2023-01-31) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 13194aa56..102b96095 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.5.0" +__version__ = "3.6.0" From c7417f43563e20a3e6f1a57f46925fb274b28b07 Mon Sep 17 00:00:00 2001 From: Atsushi Yamamoto Date: Mon, 27 Feb 2023 06:38:00 -0800 Subject: [PATCH 089/536] docs: Remove < 3.11 reference from README (#1502) --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 475d055a2..46f35e716 100644 --- a/README.rst +++ b/README.rst @@ -52,7 +52,7 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.7, < 3.11 +Python >= 3.7 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 3885fc50bda8e14bc7af5f59f8a3ae7c4575b0ea Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 27 Feb 2023 12:16:30 -0500 Subject: [PATCH 090/536] chore(python): upgrade gcp-releasetool in .kokoro [autoapprove] (#1508) Source-Link: https://github.com/googleapis/synthtool/commit/5f2a6089f73abf06238fe4310f6a14d6f6d1eed3 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8555f0e37e6261408f792bfd6635102d2da5ad73f8f09bcb24f25e6afb5fac97 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/requirements.in | 2 +- .kokoro/requirements.txt | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 894fb6bc9..5fc5daa31 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:f62c53736eccb0c4934a3ea9316e0d57696bb49c1a7c86c726e9bb8a2f87dadf + digest: sha256:8555f0e37e6261408f792bfd6635102d2da5ad73f8f09bcb24f25e6afb5fac97 diff --git a/.kokoro/requirements.in b/.kokoro/requirements.in index cbd7e77f4..882178ce6 100644 --- a/.kokoro/requirements.in +++ b/.kokoro/requirements.in @@ -1,5 +1,5 @@ gcp-docuploader -gcp-releasetool +gcp-releasetool>=1.10.5 # required for compatibility with cryptography>=39.x importlib-metadata typing-extensions twine diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 096e4800a..fa99c1290 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -154,9 +154,9 @@ gcp-docuploader==0.6.4 \ --hash=sha256:01486419e24633af78fd0167db74a2763974765ee8078ca6eb6964d0ebd388af \ --hash=sha256:70861190c123d907b3b067da896265ead2eeb9263969d6955c9e0bb091b5ccbf # via -r requirements.in -gcp-releasetool==1.10.0 \ - --hash=sha256:72a38ca91b59c24f7e699e9227c90cbe4dd71b789383cb0164b088abae294c83 \ - --hash=sha256:8c7c99320208383d4bb2b808c6880eb7a81424afe7cdba3c8d84b25f4f0e097d +gcp-releasetool==1.10.5 \ + --hash=sha256:174b7b102d704b254f2a26a3eda2c684fd3543320ec239baf771542a2e58e109 \ + --hash=sha256:e29d29927fe2ca493105a82958c6873bb2b90d503acac56be2c229e74de0eec9 # via -r requirements.in google-api-core==2.10.2 \ --hash=sha256:10c06f7739fe57781f87523375e8e1a3a4674bf6392cd6131a3222182b971320 \ From cd0aaa15960e9ca7a0aaf411c8e4990f95421816 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Mon, 27 Feb 2023 22:32:00 +0000 Subject: [PATCH 091/536] feat: add `connection_properties` and `create_session` to `LoadJobConfig` (#1509) * feat: added `connection_properties` and `create_session` in load job --- .gitignore | 1 + google/cloud/bigquery/job/load.py | 59 ++++++++++++++++++++++++++++++ tests/unit/job/test_load.py | 2 + tests/unit/job/test_load_config.py | 21 +++++++++++ 4 files changed, 83 insertions(+) diff --git a/.gitignore b/.gitignore index 99c3a1444..168b201f6 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ docs.metadata # Virtual environment env/ +venv/ # Test logs coverage.xml diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 14a7fa30b..7481cb378 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -28,6 +28,7 @@ from google.cloud.bigquery.job.base import _AsyncJob from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +from google.cloud.bigquery.query import ConnectionProperty class LoadJobConfig(_JobConfig): @@ -120,6 +121,25 @@ def clustering_fields(self, value): else: self._del_sub_prop("clustering") + @property + def connection_properties(self) -> List[ConnectionProperty]: + """Connection properties. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.connection_properties + + .. versionadded:: 3.7.0 + """ + resource = self._get_sub_prop("connectionProperties", []) + return [ConnectionProperty.from_api_repr(prop) for prop in resource] + + @connection_properties.setter + def connection_properties(self, value: Iterable[ConnectionProperty]): + self._set_sub_prop( + "connectionProperties", + [prop.to_api_repr() for prop in value], + ) + @property def create_disposition(self): """Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior @@ -134,6 +154,27 @@ def create_disposition(self): def create_disposition(self, value): self._set_sub_prop("createDisposition", value) + @property + def create_session(self) -> Optional[bool]: + """[Preview] If :data:`True`, creates a new session, where + :attr:`~google.cloud.bigquery.job.LoadJob.session_info` will contain a + random server generated session id. + + If :data:`False`, runs load job with an existing ``session_id`` passed in + :attr:`~google.cloud.bigquery.job.LoadJobConfig.connection_properties`, + otherwise runs load job in non-session mode. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_session + + .. versionadded:: 3.7.0 + """ + return self._get_sub_prop("createSession") + + @create_session.setter + def create_session(self, value: Optional[bool]): + self._set_sub_prop("createSession", value) + @property def decimal_target_types(self) -> Optional[FrozenSet[str]]: """Possible SQL data types to which the source decimal values are converted. @@ -629,6 +670,15 @@ def autodetect(self): """ return self._configuration.autodetect + @property + def connection_properties(self) -> List[ConnectionProperty]: + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.connection_properties`. + + .. versionadded:: 3.7.0 + """ + return self._configuration.connection_properties + @property def create_disposition(self): """See @@ -636,6 +686,15 @@ def create_disposition(self): """ return self._configuration.create_disposition + @property + def create_session(self) -> Optional[bool]: + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.create_session`. + + .. versionadded:: 3.7.0 + """ + return self._configuration.create_session + @property def encoding(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 143e1da59..cf3ce1661 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -392,6 +392,8 @@ def test_from_api_repr_bare(self): job = klass.from_api_repr(RESOURCE, client=client) self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + self.assertEqual(len(job.connection_properties), 0) + self.assertIsNone(job.create_session) def test_from_api_with_encryption(self): self._setUpConstants() diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 7f77fc085..4d25fa106 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -122,6 +122,27 @@ def test_create_disposition_setter(self): config.create_disposition = disposition self.assertEqual(config._properties["load"]["createDisposition"], disposition) + def test_connection_properties(self): + from google.cloud.bigquery.query import ConnectionProperty + + config = self._get_target_class()() + self.assertEqual(len(config.connection_properties), 0) + + session_id = ConnectionProperty("session_id", "abcd") + time_zone = ConnectionProperty("time_zone", "America/Chicago") + config.connection_properties = [session_id, time_zone] + self.assertEqual(len(config.connection_properties), 2) + self.assertEqual(config.connection_properties[0].key, "session_id") + self.assertEqual(config.connection_properties[0].value, "abcd") + self.assertEqual(config.connection_properties[1].key, "time_zone") + self.assertEqual(config.connection_properties[1].value, "America/Chicago") + + def test_create_session(self): + config = self._get_target_class()() + self.assertIsNone(config.create_session) + config.create_session = True + self.assertTrue(config.create_session) + def test_decimal_target_types_miss(self): config = self._get_target_class()() self.assertIsNone(config.decimal_target_types) From 792e30004f7b894cbcfe75098feb38a5a46af806 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 1 Mar 2023 13:49:09 +0000 Subject: [PATCH 092/536] chore(deps): update all dependencies (#1501) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Anthonios Partheniou Co-authored-by: Owl Bot --- .gitignore | 1 - samples/geography/requirements.txt | 12 ++++++------ samples/magics/requirements.txt | 8 ++++---- samples/snippets/requirements.txt | 10 +++++----- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 168b201f6..99c3a1444 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,6 @@ docs.metadata # Virtual environment env/ -venv/ # Test logs coverage.xml diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index f22625653..a0f64923c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -7,19 +7,19 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.0.5 -Fiona==1.9.0 -geojson==3.0.0 +Fiona==1.9.1 +geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.16.0 -google-cloud-bigquery==3.5.0 +google-auth==2.16.1 +google-cloud-bigquery==3.6.0 google-cloud-bigquery-storage==2.18.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 -grpcio==1.51.1 +grpcio==1.51.3 idna==3.4 libcst==0.4.9 munch==2.5.0 @@ -40,6 +40,6 @@ requests==2.28.2 rsa==4.9 Shapely==2.0.1 six==1.16.0 -typing-extensions==4.4.0 +typing-extensions==4.5.0 typing-inspect==0.8.0 urllib3==1.26.14 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 725975116..0513b2b5b 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.0.5 google-cloud-bigquery-storage==2.18.1 google-auth-oauthlib==1.0.0 -grpcio==1.51.1 +grpcio==1.51.3 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.9.0; python_version >= '3.9' +ipython==8.10.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.3; python_version >= '3.8' +matplotlib==3.7.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 pytz==2022.7.1 -typing-extensions==4.4.0 +typing-extensions==4.5.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 50fd19e51..de669fd16 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.0.5 -google-cloud-bigquery==3.5.0 +google-cloud-bigquery==3.6.0 google-cloud-bigquery-storage==2.18.1 google-auth-oauthlib==1.0.0 -grpcio==1.51.1 +grpcio==1.51.3 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.9.0; python_version >= '3.9' +ipython==8.10.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.6.3; python_version >= '3.8' +matplotlib==3.7.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 pytz==2022.7.1 -typing-extensions==4.4.0 +typing-extensions==4.5.0 From 75337ee4504cd739b87658286961477f2a2a2057 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 1 Mar 2023 20:42:48 +0000 Subject: [PATCH 093/536] chore(deps): update all dependencies (#1513) --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index a0f64923c..e9fb6538c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.16.1 google-cloud-bigquery==3.6.0 -google-cloud-bigquery-storage==2.18.1 +google-cloud-bigquery-storage==2.19.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 0513b2b5b..3d55ae95a 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,11 +1,11 @@ db-dtypes==1.0.5 -google-cloud-bigquery-storage==2.18.1 +google-cloud-bigquery-storage==2.19.0 google-auth-oauthlib==1.0.0 grpcio==1.51.3 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.10.0; python_version >= '3.9' +ipython==8.11.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index de669fd16..b4fc299e7 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,12 +1,12 @@ db-dtypes==1.0.5 google-cloud-bigquery==3.6.0 -google-cloud-bigquery-storage==2.18.1 +google-cloud-bigquery-storage==2.19.0 google-auth-oauthlib==1.0.0 grpcio==1.51.3 ipywidgets==8.0.4 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.10.0; python_version >= '3.9' +ipython==8.11.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.0; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' From a23092cad834c6a016f455d46fefa13bb6cdbf0f Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Thu, 2 Mar 2023 07:48:11 -0800 Subject: [PATCH 094/536] feat: add default_query_job_config property and property setter to BQ client (#1511) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [feature request](https://togithub.com/googleapis/python-bigquery/issues/1512) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes - [feature request](https://togithub.com/googleapis/python-bigquery/issues/1512)🦕 - [internal bug](https://b.corp.google.com/issues/271044948) --- google/cloud/bigquery/client.py | 11 +++++++++++ tests/unit/test_client.py | 13 +++++++++++++ 2 files changed, 24 insertions(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index b03266528..af8eaf5a7 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -266,6 +266,17 @@ def location(self): """Default location for jobs / datasets / tables.""" return self._location + @property + def default_query_job_config(self): + """Default ``QueryJobConfig``. + Will be merged into job configs passed into the ``query`` method. + """ + return self._default_query_job_config + + @default_query_job_config.setter + def default_query_job_config(self, value: QueryJobConfig): + self._default_query_job_config = copy.deepcopy(value) + def close(self): """Close the underlying transport objects, releasing system resources. diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 22f7286db..f38874843 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -413,6 +413,19 @@ def test__get_query_results_hit(self): self.assertEqual(query_results.total_rows, 10) self.assertTrue(query_results.complete) + def test_default_query_job_config(self): + from google.cloud.bigquery import QueryJobConfig + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + self.assertIsNone(client.default_query_job_config) + + job_config = QueryJobConfig() + job_config.dry_run = True + client.default_query_job_config = job_config + self.assertIsInstance(client.default_query_job_config, QueryJobConfig) + def test_get_service_account_email(self): path = "/projects/%s/serviceAccount" % (self.PROJECT,) creds = _make_credentials() From ad0ec31acd517392bfcbdbdbd5a5283afc423753 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 4 Mar 2023 11:31:55 +0000 Subject: [PATCH 095/536] chore(deps): update all dependencies (#1514) --- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 2 +- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 100e0639c..e0ec46254 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.2.1 +pytest==7.2.2 mock==5.0.1 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e9fb6538c..cfda81374 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.16.1 +google-auth==2.16.2 google-cloud-bigquery==3.6.0 google-cloud-bigquery-storage==2.19.0 google-cloud-core==2.3.2 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index e8f3982c7..3ed7558d5 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.1 +pytest==7.2.2 mock==5.0.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 3d55ae95a..55b828f1b 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -7,7 +7,7 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.11.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.0; python_version >= '3.8' +matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index e8f3982c7..3ed7558d5 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.1 +pytest==7.2.2 mock==5.0.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index b4fc299e7..6c6b17ea8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,7 +8,7 @@ ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.11.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.0; python_version >= '3.8' +matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 From cbbfeb57215e24f51d1658070558a22e6580c4ca Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 6 Mar 2023 23:06:30 +0000 Subject: [PATCH 096/536] chore(deps): update dependency charset-normalizer to v3.1.0 (#1518) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index cfda81374..75964dbe1 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==22.2.0 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==3.0.1 +charset-normalizer==3.1.0 click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 From 2d46d3e02f76b9d94580ae7183ec8aeecb401a5f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 13 Mar 2023 12:46:05 -0500 Subject: [PATCH 097/536] chore(main): release 3.7.0 (#1507) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67c43200b..5eda8912d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.7.0](https://github.com/googleapis/python-bigquery/compare/v3.6.0...v3.7.0) (2023-03-06) + + +### Features + +* Add `connection_properties` and `create_session` to `LoadJobConfig` ([#1509](https://github.com/googleapis/python-bigquery/issues/1509)) ([cd0aaa1](https://github.com/googleapis/python-bigquery/commit/cd0aaa15960e9ca7a0aaf411c8e4990f95421816)) +* Add default_query_job_config property and property setter to BQ client ([#1511](https://github.com/googleapis/python-bigquery/issues/1511)) ([a23092c](https://github.com/googleapis/python-bigquery/commit/a23092cad834c6a016f455d46fefa13bb6cdbf0f)) + + +### Documentation + +* Remove < 3.11 reference from README ([#1502](https://github.com/googleapis/python-bigquery/issues/1502)) ([c7417f4](https://github.com/googleapis/python-bigquery/commit/c7417f43563e20a3e6f1a57f46925fb274b28b07)) + ## [3.6.0](https://github.com/googleapis/python-bigquery/compare/v3.5.0...v3.6.0) (2023-02-22) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 102b96095..dc87b3c5b 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.6.0" +__version__ = "3.7.0" From 8270a10df8f40750a7ac541a1781a71d7e79ce67 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 14 Mar 2023 14:13:23 -0500 Subject: [PATCH 098/536] feat: expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob (#1521) * feat: expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob Note for google-cloud-bigquery developers: This also refactors these classes so that `_set_properties` does not modify the `_properties` dictionary in-place. Doing so was also mutating the request object, making it difficult to debug what request was _actually_ sent. Before this change, many tests hallucinated that the request was always equal to the response. * E google.api_core.exceptions.BadRequest: 400 Clone operation with write disposition WRITE_TRUNCATE is not supported. Please try again with WRITE_EMPTY. --- google/cloud/bigquery/client.py | 9 +- google/cloud/bigquery/job/base.py | 375 ++++++++++++++------------- google/cloud/bigquery/job/copy_.py | 20 +- google/cloud/bigquery/job/extract.py | 23 +- google/cloud/bigquery/job/load.py | 64 ++--- google/cloud/bigquery/job/query.py | 65 ++--- tests/system/test_client.py | 2 +- tests/unit/job/test_base.py | 38 ++- tests/unit/job/test_load.py | 5 +- tests/unit/test_client.py | 21 +- 10 files changed, 333 insertions(+), 289 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index af8eaf5a7..a53819cde 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1976,15 +1976,8 @@ def create_job( ) destination = _get_sub_prop(job_config, ["copy", "destinationTable"]) destination = TableReference.from_api_repr(destination) - sources = [] - source_configs = _get_sub_prop(job_config, ["copy", "sourceTables"]) - if source_configs is None: - source_configs = [_get_sub_prop(job_config, ["copy", "sourceTable"])] - for source_config in source_configs: - table_ref = TableReference.from_api_repr(source_config) - sources.append(table_ref) return self.copy_table( - sources, + [], # Source table(s) already in job_config resource. destination, job_config=typing.cast(CopyJobConfig, copy_job_config), retry=retry, diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 86701e295..55e80b2eb 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -19,7 +19,7 @@ import http import threading import typing -from typing import Dict, Optional, Sequence +from typing import ClassVar, Dict, Optional, Sequence from google.api_core import exceptions import google.api_core.future.polling @@ -150,6 +150,182 @@ def _from_api_repr(cls, resource): return job_ref +class _JobConfig(object): + """Abstract base class for job configuration objects. + + Args: + job_type (str): The key to use for the job configuration. + """ + + def __init__(self, job_type, **kwargs): + self._job_type = job_type + self._properties = {job_type: {}} + for prop, val in kwargs.items(): + setattr(self, prop, val) + + def __setattr__(self, name, value): + """Override to be able to raise error if an unknown property is being set""" + if not name.startswith("_") and not hasattr(type(self), name): + raise AttributeError( + "Property {} is unknown for {}.".format(name, type(self)) + ) + super(_JobConfig, self).__setattr__(name, value) + + @property + def labels(self): + """Dict[str, str]: Labels for the job. + + This method always returns a dict. Once a job has been created on the + server, its labels cannot be modified anymore. + + Raises: + ValueError: If ``value`` type is invalid. + """ + return self._properties.setdefault("labels", {}) + + @labels.setter + def labels(self, value): + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties["labels"] = value + + def _get_sub_prop(self, key, default=None): + """Get a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access + those properties:: + + self._get_sub_prop('destinationTable') + + This is equivalent to using the ``_helpers._get_sub_prop`` function:: + + _helpers._get_sub_prop( + self._properties, ['query', 'destinationTable']) + + Args: + key (str): + Key for the value to get in the + ``self._properties[self._job_type]`` dictionary. + default (Optional[object]): + Default value to return if the key is not found. + Defaults to :data:`None`. + + Returns: + object: The value if present or the default. + """ + return _helpers._get_sub_prop( + self._properties, [self._job_type, key], default=default + ) + + def _set_sub_prop(self, key, value): + """Set a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set + those properties:: + + self._set_sub_prop('useLegacySql', False) + + This is equivalent to using the ``_helper._set_sub_prop`` function:: + + _helper._set_sub_prop( + self._properties, ['query', 'useLegacySql'], False) + + Args: + key (str): + Key to set in the ``self._properties[self._job_type]`` + dictionary. + value (object): Value to set. + """ + _helpers._set_sub_prop(self._properties, [self._job_type, key], value) + + def _del_sub_prop(self, key): + """Remove ``key`` from the ``self._properties[self._job_type]`` dict. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear + those properties:: + + self._del_sub_prop('useLegacySql') + + This is equivalent to using the ``_helper._del_sub_prop`` function:: + + _helper._del_sub_prop( + self._properties, ['query', 'useLegacySql']) + + Args: + key (str): + Key to remove in the ``self._properties[self._job_type]`` + dictionary. + """ + _helpers._del_sub_prop(self._properties, [self._job_type, key]) + + def to_api_repr(self) -> dict: + """Build an API representation of the job config. + + Returns: + Dict: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + def _fill_from_default(self, default_job_config): + """Merge this job config with a default job config. + + The keys in this object take precedence over the keys in the default + config. The merge is done at the top-level as well as for keys one + level below the job type. + + Args: + default_job_config (google.cloud.bigquery.job._JobConfig): + The default job config that will be used to fill in self. + + Returns: + google.cloud.bigquery.job._JobConfig: A new (merged) job config. + """ + if self._job_type != default_job_config._job_type: + raise TypeError( + "attempted to merge two incompatible job types: " + + repr(self._job_type) + + ", " + + repr(default_job_config._job_type) + ) + + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + new_job_config = self.__class__() # pytype: disable=missing-parameter + + default_job_properties = copy.deepcopy(default_job_config._properties) + for key in self._properties: + if key != self._job_type: + default_job_properties[key] = self._properties[key] + + default_job_properties[self._job_type].update(self._properties[self._job_type]) + new_job_config._properties = default_job_properties + + return new_job_config + + @classmethod + def from_api_repr(cls, resource: dict) -> "_JobConfig": + """Factory: construct a job configuration given its API representation + + Args: + resource (Dict): + A job configuration in the same representation as is returned + from the API. + + Returns: + google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. + """ + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + job_config = cls() # type: ignore # pytype: disable=missing-parameter + job_config._properties = resource + return job_config + + class _AsyncJob(google.api_core.future.polling.PollingFuture): """Base class for asynchronous jobs. @@ -161,6 +337,9 @@ class _AsyncJob(google.api_core.future.polling.PollingFuture): Client which holds credentials and project configuration. """ + _JOB_TYPE = "unknown" + _CONFIG_CLASS: ClassVar + def __init__(self, job_id, client): super(_AsyncJob, self).__init__() @@ -176,6 +355,13 @@ def __init__(self, job_id, client): self._result_set = False self._completion_lock = threading.Lock() + @property + def configuration(self) -> _JobConfig: + """Job-type specific configurtion.""" + configuration = self._CONFIG_CLASS() + configuration._properties = self._properties.setdefault("configuration", {}) + return configuration + @property def job_id(self): """str: ID of the job.""" @@ -426,8 +612,7 @@ def _set_properties(self, api_response): api_response (Dict): response returned from an API call. """ cleaned = api_response.copy() - - statistics = cleaned.get("statistics", {}) + statistics = cleaned.setdefault("statistics", {}) if "creationTime" in statistics: statistics["creationTime"] = float(statistics["creationTime"]) if "startTime" in statistics: @@ -435,13 +620,7 @@ def _set_properties(self, api_response): if "endTime" in statistics: statistics["endTime"] = float(statistics["endTime"]) - # Save configuration to keep reference same in self._configuration. - cleaned_config = cleaned.pop("configuration", {}) - configuration = self._properties.pop("configuration", {}) - self._properties.clear() - self._properties.update(cleaned) - self._properties["configuration"] = configuration - self._properties["configuration"].update(cleaned_config) + self._properties = cleaned # For Future interface self._set_future_result() @@ -751,182 +930,6 @@ def __repr__(self): return result -class _JobConfig(object): - """Abstract base class for job configuration objects. - - Args: - job_type (str): The key to use for the job configuration. - """ - - def __init__(self, job_type, **kwargs): - self._job_type = job_type - self._properties = {job_type: {}} - for prop, val in kwargs.items(): - setattr(self, prop, val) - - def __setattr__(self, name, value): - """Override to be able to raise error if an unknown property is being set""" - if not name.startswith("_") and not hasattr(type(self), name): - raise AttributeError( - "Property {} is unknown for {}.".format(name, type(self)) - ) - super(_JobConfig, self).__setattr__(name, value) - - @property - def labels(self): - """Dict[str, str]: Labels for the job. - - This method always returns a dict. Once a job has been created on the - server, its labels cannot be modified anymore. - - Raises: - ValueError: If ``value`` type is invalid. - """ - return self._properties.setdefault("labels", {}) - - @labels.setter - def labels(self, value): - if not isinstance(value, dict): - raise ValueError("Pass a dict") - self._properties["labels"] = value - - def _get_sub_prop(self, key, default=None): - """Get a value in the ``self._properties[self._job_type]`` dictionary. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access - those properties:: - - self._get_sub_prop('destinationTable') - - This is equivalent to using the ``_helpers._get_sub_prop`` function:: - - _helpers._get_sub_prop( - self._properties, ['query', 'destinationTable']) - - Args: - key (str): - Key for the value to get in the - ``self._properties[self._job_type]`` dictionary. - default (Optional[object]): - Default value to return if the key is not found. - Defaults to :data:`None`. - - Returns: - object: The value if present or the default. - """ - return _helpers._get_sub_prop( - self._properties, [self._job_type, key], default=default - ) - - def _set_sub_prop(self, key, value): - """Set a value in the ``self._properties[self._job_type]`` dictionary. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set - those properties:: - - self._set_sub_prop('useLegacySql', False) - - This is equivalent to using the ``_helper._set_sub_prop`` function:: - - _helper._set_sub_prop( - self._properties, ['query', 'useLegacySql'], False) - - Args: - key (str): - Key to set in the ``self._properties[self._job_type]`` - dictionary. - value (object): Value to set. - """ - _helpers._set_sub_prop(self._properties, [self._job_type, key], value) - - def _del_sub_prop(self, key): - """Remove ``key`` from the ``self._properties[self._job_type]`` dict. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear - those properties:: - - self._del_sub_prop('useLegacySql') - - This is equivalent to using the ``_helper._del_sub_prop`` function:: - - _helper._del_sub_prop( - self._properties, ['query', 'useLegacySql']) - - Args: - key (str): - Key to remove in the ``self._properties[self._job_type]`` - dictionary. - """ - _helpers._del_sub_prop(self._properties, [self._job_type, key]) - - def to_api_repr(self) -> dict: - """Build an API representation of the job config. - - Returns: - Dict: A dictionary in the format used by the BigQuery API. - """ - return copy.deepcopy(self._properties) - - def _fill_from_default(self, default_job_config): - """Merge this job config with a default job config. - - The keys in this object take precedence over the keys in the default - config. The merge is done at the top-level as well as for keys one - level below the job type. - - Args: - default_job_config (google.cloud.bigquery.job._JobConfig): - The default job config that will be used to fill in self. - - Returns: - google.cloud.bigquery.job._JobConfig: A new (merged) job config. - """ - if self._job_type != default_job_config._job_type: - raise TypeError( - "attempted to merge two incompatible job types: " - + repr(self._job_type) - + ", " - + repr(default_job_config._job_type) - ) - - # cls is one of the job config subclasses that provides the job_type argument to - # this base class on instantiation, thus missing-parameter warning is a false - # positive here. - new_job_config = self.__class__() # pytype: disable=missing-parameter - - default_job_properties = copy.deepcopy(default_job_config._properties) - for key in self._properties: - if key != self._job_type: - default_job_properties[key] = self._properties[key] - - default_job_properties[self._job_type].update(self._properties[self._job_type]) - new_job_config._properties = default_job_properties - - return new_job_config - - @classmethod - def from_api_repr(cls, resource: dict) -> "_JobConfig": - """Factory: construct a job configuration given its API representation - - Args: - resource (Dict): - A job configuration in the same representation as is returned - from the API. - - Returns: - google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. - """ - # cls is one of the job config subclasses that provides the job_type argument to - # this base class on instantiation, thus missing-parameter warning is a false - # positive here. - job_config = cls() # type: ignore # pytype: disable=missing-parameter - job_config._properties = resource - return job_config - - class ScriptStackFrame(object): """Stack frame showing the line/column/procedure name where the current evaluation happened. diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py index 9d7548ec5..5c52aeed6 100644 --- a/google/cloud/bigquery/job/copy_.py +++ b/google/cloud/bigquery/job/copy_.py @@ -14,6 +14,7 @@ """Classes for copy jobs.""" +import typing from typing import Optional from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -160,15 +161,13 @@ class CopyJob(_AsyncJob): """ _JOB_TYPE = "copy" + _CONFIG_CLASS = CopyJobConfig def __init__(self, job_id, sources, destination, client, job_config=None): super(CopyJob, self).__init__(job_id, client) - if not job_config: - job_config = CopyJobConfig() - - self._configuration = job_config - self._properties["configuration"] = job_config._properties + if job_config is not None: + self._properties["configuration"] = job_config._properties if destination: _helpers._set_sub_prop( @@ -185,6 +184,11 @@ def __init__(self, job_id, sources, destination, client, job_config=None): source_resources, ) + @property + def configuration(self) -> CopyJobConfig: + """The configuration for this copy job.""" + return typing.cast(CopyJobConfig, super().configuration) + @property def destination(self): """google.cloud.bigquery.table.TableReference: Table into which data @@ -223,14 +227,14 @@ def create_disposition(self): """See :attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`. """ - return self._configuration.create_disposition + return self.configuration.create_disposition @property def write_disposition(self): """See :attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`. """ - return self._configuration.write_disposition + return self.configuration.write_disposition @property def destination_encryption_configuration(self): @@ -243,7 +247,7 @@ def destination_encryption_configuration(self): See :attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`. """ - return self._configuration.destination_encryption_configuration + return self.configuration.destination_encryption_configuration def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py index 52aa036c9..64ec39b76 100644 --- a/google/cloud/bigquery/job/extract.py +++ b/google/cloud/bigquery/job/extract.py @@ -14,6 +14,8 @@ """Classes for extract (export) jobs.""" +import typing + from google.cloud.bigquery import _helpers from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.table import Table @@ -125,15 +127,13 @@ class ExtractJob(_AsyncJob): """ _JOB_TYPE = "extract" + _CONFIG_CLASS = ExtractJobConfig def __init__(self, job_id, source, destination_uris, client, job_config=None): super(ExtractJob, self).__init__(job_id, client) - if job_config is None: - job_config = ExtractJobConfig() - - self._properties["configuration"] = job_config._properties - self._configuration = job_config + if job_config is not None: + self._properties["configuration"] = job_config._properties if source: source_ref = {"projectId": source.project, "datasetId": source.dataset_id} @@ -156,6 +156,11 @@ def __init__(self, job_id, source, destination_uris, client, job_config=None): destination_uris, ) + @property + def configuration(self) -> ExtractJobConfig: + """The configuration for this extract job.""" + return typing.cast(ExtractJobConfig, super().configuration) + @property def source(self): """Union[ \ @@ -189,28 +194,28 @@ def compression(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`. """ - return self._configuration.compression + return self.configuration.compression @property def destination_format(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`. """ - return self._configuration.destination_format + return self.configuration.destination_format @property def field_delimiter(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. """ - return self._configuration.field_delimiter + return self.configuration.field_delimiter @property def print_header(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`. """ - return self._configuration.print_header + return self.configuration.print_header @property def destination_uri_file_counts(self): diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 7481cb378..6b6c8bfd9 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -14,6 +14,7 @@ """Classes for load jobs.""" +import typing from typing import FrozenSet, List, Iterable, Optional from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -605,15 +606,13 @@ class LoadJob(_AsyncJob): """ _JOB_TYPE = "load" + _CONFIG_CLASS = LoadJobConfig def __init__(self, job_id, source_uris, destination, client, job_config=None): super(LoadJob, self).__init__(job_id, client) - if not job_config: - job_config = LoadJobConfig() - - self._configuration = job_config - self._properties["configuration"] = job_config._properties + if job_config is not None: + self._properties["configuration"] = job_config._properties if source_uris is not None: _helpers._set_sub_prop( @@ -627,6 +626,11 @@ def __init__(self, job_id, source_uris, destination, client, job_config=None): destination.to_api_repr(), ) + @property + def configuration(self) -> LoadJobConfig: + """The configuration for this load job.""" + return typing.cast(LoadJobConfig, super().configuration) + @property def destination(self): """google.cloud.bigquery.table.TableReference: table where loaded rows are written @@ -654,21 +658,21 @@ def allow_jagged_rows(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. """ - return self._configuration.allow_jagged_rows + return self.configuration.allow_jagged_rows @property def allow_quoted_newlines(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. """ - return self._configuration.allow_quoted_newlines + return self.configuration.allow_quoted_newlines @property def autodetect(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`. """ - return self._configuration.autodetect + return self.configuration.autodetect @property def connection_properties(self) -> List[ConnectionProperty]: @@ -677,14 +681,14 @@ def connection_properties(self) -> List[ConnectionProperty]: .. versionadded:: 3.7.0 """ - return self._configuration.connection_properties + return self.configuration.connection_properties @property def create_disposition(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`. """ - return self._configuration.create_disposition + return self.configuration.create_disposition @property def create_session(self) -> Optional[bool]: @@ -693,84 +697,84 @@ def create_session(self) -> Optional[bool]: .. versionadded:: 3.7.0 """ - return self._configuration.create_session + return self.configuration.create_session @property def encoding(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`. """ - return self._configuration.encoding + return self.configuration.encoding @property def field_delimiter(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. """ - return self._configuration.field_delimiter + return self.configuration.field_delimiter @property def ignore_unknown_values(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. """ - return self._configuration.ignore_unknown_values + return self.configuration.ignore_unknown_values @property def max_bad_records(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. """ - return self._configuration.max_bad_records + return self.configuration.max_bad_records @property def null_marker(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`. """ - return self._configuration.null_marker + return self.configuration.null_marker @property def quote_character(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`. """ - return self._configuration.quote_character + return self.configuration.quote_character @property def reference_file_schema_uri(self): """See: attr:`google.cloud.bigquery.job.LoadJobConfig.reference_file_schema_uri`. """ - return self._configuration.reference_file_schema_uri + return self.configuration.reference_file_schema_uri @property def skip_leading_rows(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. """ - return self._configuration.skip_leading_rows + return self.configuration.skip_leading_rows @property def source_format(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`. """ - return self._configuration.source_format + return self.configuration.source_format @property def write_disposition(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`. """ - return self._configuration.write_disposition + return self.configuration.write_disposition @property def schema(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.schema`. """ - return self._configuration.schema + return self.configuration.schema @property def destination_encryption_configuration(self): @@ -783,7 +787,7 @@ def destination_encryption_configuration(self): See :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`. """ - return self._configuration.destination_encryption_configuration + return self.configuration.destination_encryption_configuration @property def destination_table_description(self): @@ -792,7 +796,7 @@ def destination_table_description(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description """ - return self._configuration.destination_table_description + return self.configuration.destination_table_description @property def destination_table_friendly_name(self): @@ -801,42 +805,42 @@ def destination_table_friendly_name(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name """ - return self._configuration.destination_table_friendly_name + return self.configuration.destination_table_friendly_name @property def range_partitioning(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. """ - return self._configuration.range_partitioning + return self.configuration.range_partitioning @property def time_partitioning(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`. """ - return self._configuration.time_partitioning + return self.configuration.time_partitioning @property def use_avro_logical_types(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`. """ - return self._configuration.use_avro_logical_types + return self.configuration.use_avro_logical_types @property def clustering_fields(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`. """ - return self._configuration.clustering_fields + return self.configuration.clustering_fields @property def schema_update_options(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`. """ - return self._configuration.schema_update_options + return self.configuration.schema_update_options @property def input_file_bytes(self): diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index c63fa0892..e6d6d682d 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -745,17 +745,15 @@ class QueryJob(_AsyncJob): _JOB_TYPE = "query" _UDF_KEY = "userDefinedFunctionResources" + _CONFIG_CLASS = QueryJobConfig def __init__(self, job_id, query, client, job_config=None): super(QueryJob, self).__init__(job_id, client) - if job_config is None: - job_config = QueryJobConfig() - if job_config.use_legacy_sql is None: - job_config.use_legacy_sql = False - - self._properties["configuration"] = job_config._properties - self._configuration = job_config + if job_config is not None: + self._properties["configuration"] = job_config._properties + if self.configuration.use_legacy_sql is None: + self.configuration.use_legacy_sql = False if query: _helpers._set_sub_prop( @@ -771,7 +769,12 @@ def allow_large_results(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. """ - return self._configuration.allow_large_results + return self.configuration.allow_large_results + + @property + def configuration(self) -> QueryJobConfig: + """The configuration for this query job.""" + return typing.cast(QueryJobConfig, super().configuration) @property def connection_properties(self) -> List[ConnectionProperty]: @@ -780,14 +783,14 @@ def connection_properties(self) -> List[ConnectionProperty]: .. versionadded:: 2.29.0 """ - return self._configuration.connection_properties + return self.configuration.connection_properties @property def create_disposition(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`. """ - return self._configuration.create_disposition + return self.configuration.create_disposition @property def create_session(self) -> Optional[bool]: @@ -796,21 +799,21 @@ def create_session(self) -> Optional[bool]: .. versionadded:: 2.29.0 """ - return self._configuration.create_session + return self.configuration.create_session @property def default_dataset(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`. """ - return self._configuration.default_dataset + return self.configuration.default_dataset @property def destination(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.destination`. """ - return self._configuration.destination + return self.configuration.destination @property def destination_encryption_configuration(self): @@ -823,28 +826,28 @@ def destination_encryption_configuration(self): See :attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`. """ - return self._configuration.destination_encryption_configuration + return self.configuration.destination_encryption_configuration @property def dry_run(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`. """ - return self._configuration.dry_run + return self.configuration.dry_run @property def flatten_results(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`. """ - return self._configuration.flatten_results + return self.configuration.flatten_results @property def priority(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.priority`. """ - return self._configuration.priority + return self.configuration.priority @property def query(self): @@ -862,90 +865,90 @@ def query_parameters(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`. """ - return self._configuration.query_parameters + return self.configuration.query_parameters @property def udf_resources(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`. """ - return self._configuration.udf_resources + return self.configuration.udf_resources @property def use_legacy_sql(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. """ - return self._configuration.use_legacy_sql + return self.configuration.use_legacy_sql @property def use_query_cache(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. """ - return self._configuration.use_query_cache + return self.configuration.use_query_cache @property def write_disposition(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`. """ - return self._configuration.write_disposition + return self.configuration.write_disposition @property def maximum_billing_tier(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. """ - return self._configuration.maximum_billing_tier + return self.configuration.maximum_billing_tier @property def maximum_bytes_billed(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. """ - return self._configuration.maximum_bytes_billed + return self.configuration.maximum_bytes_billed @property def range_partitioning(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`. """ - return self._configuration.range_partitioning + return self.configuration.range_partitioning @property def table_definitions(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`. """ - return self._configuration.table_definitions + return self.configuration.table_definitions @property def time_partitioning(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`. """ - return self._configuration.time_partitioning + return self.configuration.time_partitioning @property def clustering_fields(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`. """ - return self._configuration.clustering_fields + return self.configuration.clustering_fields @property def schema_update_options(self): """See :attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`. """ - return self._configuration.schema_update_options + return self.configuration.schema_update_options def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" # Use to_api_repr to allow for some configuration properties to be set # automatically. - configuration = self._configuration.to_api_repr() + configuration = self.configuration.to_api_repr() return { "jobReference": self._properties["jobReference"], "configuration": configuration, @@ -1257,7 +1260,7 @@ def _format_for_exception(message: str, query: str): """ template = "{message}\n\n{header}\n\n{ruler}\n{body}\n{ruler}" - lines = query.splitlines() + lines = query.splitlines() if query is not None else [""] max_line_len = max(len(line) for line in lines) header = "-----Query Job SQL Follows-----" diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 14a9b04d4..a69bb92c5 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2455,7 +2455,7 @@ def test_table_clones(dataset_id): # Now create a clone before modifying the original table data. copy_config = CopyJobConfig() copy_config.operation_type = OperationType.CLONE - copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + copy_config.write_disposition = bigquery.WriteDisposition.WRITE_EMPTY copy_job = client.copy_table( sources=table_path_source, diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index ed0dc731b..3ff96e874 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -432,11 +432,19 @@ def _set_properties_job(self): def test__set_properties_no_stats(self): config = {"test": True} resource = {"configuration": config} + expected = resource.copy() + expected["statistics"] = {} job = self._set_properties_job() + original_resource = job._properties job._set_properties(resource) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) + + # Make sure we don't mutate the object used in the request, as that + # makes debugging more difficult and leads to false positives in unit + # tests. + self.assertIsNot(job._properties, original_resource) def test__set_properties_w_creation_time(self): now, millis = self._datetime_and_millis() @@ -546,6 +554,8 @@ def test__begin_defaults(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} job = self._set_properties_job() builder = job.to_api_repr = mock.Mock() builder.return_value = resource @@ -564,7 +574,7 @@ def test__begin_defaults(self): data=resource, timeout=None, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test__begin_explicit(self): from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -578,6 +588,8 @@ def test__begin_explicit(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} job = self._set_properties_job() builder = job.to_api_repr = mock.Mock() builder.return_value = resource @@ -598,7 +610,7 @@ def test__begin_explicit(self): data=resource, timeout=7.5, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test_exists_defaults_miss(self): from google.cloud.exceptions import NotFound @@ -685,6 +697,8 @@ def test_reload_defaults(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} job = self._set_properties_job() job._properties["jobReference"]["location"] = self.LOCATION call_api = job._client._call_api = mock.Mock() @@ -703,7 +717,7 @@ def test_reload_defaults(self): query_params={"location": self.LOCATION}, timeout=None, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test_reload_explicit(self): from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -717,6 +731,8 @@ def test_reload_explicit(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} job = self._set_properties_job() client = _make_client(project=other_project) call_api = client._call_api = mock.Mock() @@ -736,7 +752,7 @@ def test_reload_explicit(self): query_params={}, timeout=4.2, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test_cancel_defaults(self): resource = { @@ -747,6 +763,8 @@ def test_cancel_defaults(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} response = {"job": resource} job = self._set_properties_job() job._properties["jobReference"]["location"] = self.LOCATION @@ -764,7 +782,7 @@ def test_cancel_defaults(self): query_params={"location": self.LOCATION}, timeout=None, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test_cancel_explicit(self): other_project = "other-project-234" @@ -776,6 +794,8 @@ def test_cancel_explicit(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} response = {"job": resource} job = self._set_properties_job() client = _make_client(project=other_project) @@ -797,7 +817,7 @@ def test_cancel_explicit(self): query_params={}, timeout=7.5, ) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) def test_cancel_w_custom_retry(self): from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -811,6 +831,8 @@ def test_cancel_w_custom_retry(self): }, "configuration": {"test": True}, } + expected = resource.copy() + expected["statistics"] = {} response = {"job": resource} job = self._set_properties_job() @@ -830,7 +852,7 @@ def test_cancel_w_custom_retry(self): final_attributes.assert_called() self.assertTrue(result) - self.assertEqual(job._properties, resource) + self.assertEqual(job._properties, expected) self.assertEqual( fake_api_request.call_args_list, [ diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index cf3ce1661..c6bbaa2fb 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -451,6 +451,7 @@ def test_begin_w_bound_client(self): conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job.configuration.reference_file_schema_uri = self.REFERENCE_FILE_SCHEMA_URI path = "/projects/{}/jobs".format(self.PROJECT) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" @@ -498,6 +499,7 @@ def test_begin_w_autodetect(self): job = self._make_one( self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config ) + job.configuration.reference_file_schema_uri = self.REFERENCE_FILE_SCHEMA_URI with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -554,19 +556,18 @@ def test_begin_w_alternate_client(self): "sourceFormat": "CSV", "useAvroLogicalTypes": True, "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + "referenceFileSchemaUri": "gs://path/to/reference", "schema": { "fields": [ { "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "description": None, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, }, ] }, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f38874843..f52eb825a 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2743,17 +2743,21 @@ def _create_job_helper(self, job_config): http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - RESOURCE = { + resource = { + "jobReference": {"projectId": self.PROJECT, "jobId": "random-id"}, + "configuration": job_config, + } + expected = { "jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY}, "configuration": job_config, } - conn = client._connection = make_connection(RESOURCE) + conn = client._connection = make_connection(resource) client.create_job(job_config=job_config) conn.api_request.assert_called_once_with( method="POST", path="/projects/%s/jobs" % self.PROJECT, - data=RESOURCE, + data=expected, timeout=DEFAULT_TIMEOUT, ) @@ -3156,7 +3160,7 @@ def test_load_table_from_uri(self): self.assertEqual(job_config.to_api_repr(), original_config_copy.to_api_repr()) self.assertIsInstance(job, LoadJob) - self.assertIsInstance(job._configuration, LoadJobConfig) + self.assertIsInstance(job.configuration, LoadJobConfig) self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) @@ -3662,7 +3666,7 @@ def test_copy_table_w_source_strings(self): creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = make_connection({}) + conn = client._connection = make_connection({}) sources = [ "dataset_wo_proj.some_table", "other_project.other_dataset.other_table", @@ -3674,6 +3678,11 @@ def test_copy_table_w_source_strings(self): job = client.copy_table(sources, destination) + # Replace job with the request instead of response so we can verify those properties. + _, kwargs = conn.api_request.call_args + request = kwargs["data"] + job._properties = request + expected_sources = [ DatasetReference(client.project, "dataset_wo_proj").table("some_table"), DatasetReference("other_project", "other_dataset").table("other_table"), @@ -3750,7 +3759,7 @@ def test_copy_table_w_valid_job_config(self): data=RESOURCE, timeout=DEFAULT_TIMEOUT, ) - self.assertIsInstance(job._configuration, CopyJobConfig) + self.assertIsInstance(job.configuration, CopyJobConfig) # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() From aa0fa025f03626061e1dfff74ae4196a27f30676 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 16 Mar 2023 08:28:02 -0400 Subject: [PATCH 099/536] chore(deps): Update nox in .kokoro/requirements.in [autoapprove] (#1527) Source-Link: https://github.com/googleapis/synthtool/commit/92006bb3cdc84677aa93c7f5235424ec2b157146 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/requirements.in | 2 +- .kokoro/requirements.txt | 14 +++++--------- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 5fc5daa31..b8edda51c 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8555f0e37e6261408f792bfd6635102d2da5ad73f8f09bcb24f25e6afb5fac97 + digest: sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 diff --git a/.kokoro/requirements.in b/.kokoro/requirements.in index 882178ce6..ec867d9fd 100644 --- a/.kokoro/requirements.in +++ b/.kokoro/requirements.in @@ -5,6 +5,6 @@ typing-extensions twine wheel setuptools -nox +nox>=2022.11.21 # required to remove dependency on py charset-normalizer<3 click<8.1.0 diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index fa99c1290..66a2172a7 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile with python 3.10 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: # # pip-compile --allow-unsafe --generate-hashes requirements.in # @@ -335,9 +335,9 @@ more-itertools==9.0.0 \ --hash=sha256:250e83d7e81d0c87ca6bd942e6aeab8cc9daa6096d12c5308f3f92fa5e5c1f41 \ --hash=sha256:5a6257e40878ef0520b1803990e3e22303a41b5714006c32a3fd8304b26ea1ab # via jaraco-classes -nox==2022.8.7 \ - --hash=sha256:1b894940551dc5c389f9271d197ca5d655d40bdc6ccf93ed6880e4042760a34b \ - --hash=sha256:96cca88779e08282a699d672258ec01eb7c792d35bbbf538c723172bce23212c +nox==2022.11.21 \ + --hash=sha256:0e41a990e290e274cb205a976c4c97ee3c5234441a8132c8c3fd9ea3c22149eb \ + --hash=sha256:e21c31de0711d1274ca585a2c5fde36b1aa962005ba8e9322bf5eeed16dcd684 # via -r requirements.in packaging==21.3 \ --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ @@ -380,10 +380,6 @@ protobuf==3.20.3 \ # gcp-docuploader # gcp-releasetool # google-api-core -py==1.11.0 \ - --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ - --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 - # via nox pyasn1==0.4.8 \ --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \ --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba From a2520cabf7ec6bcb923c21e338188f1c10dc4d5d Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Fri, 17 Mar 2023 13:03:16 -0700 Subject: [PATCH 100/536] feat: add default LoadJobConfig to Client (#1526) --- google/cloud/bigquery/client.py | 121 ++++--- google/cloud/bigquery/job/base.py | 6 +- tests/system/test_client.py | 8 +- tests/unit/job/test_base.py | 29 +- tests/unit/test_client.py | 513 ++++++++++++++++++++++++++++++ 5 files changed, 621 insertions(+), 56 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index a53819cde..d8fbfb69e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -210,6 +210,9 @@ class Client(ClientWithProject): default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): Default ``QueryJobConfig``. Will be merged into job configs passed into the ``query`` method. + default_load_job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + Default ``LoadJobConfig``. + Will be merged into job configs passed into the ``load_table_*`` methods. client_info (Optional[google.api_core.client_info.ClientInfo]): The client info used to send a user-agent string along with API requests. If ``None``, then default info will be used. Generally, @@ -235,6 +238,7 @@ def __init__( _http=None, location=None, default_query_job_config=None, + default_load_job_config=None, client_info=None, client_options=None, ) -> None: @@ -260,6 +264,7 @@ def __init__( self._connection = Connection(self, **kw_args) self._location = location self._default_query_job_config = copy.deepcopy(default_query_job_config) + self._default_load_job_config = copy.deepcopy(default_load_job_config) @property def location(self): @@ -277,6 +282,17 @@ def default_query_job_config(self): def default_query_job_config(self, value: QueryJobConfig): self._default_query_job_config = copy.deepcopy(value) + @property + def default_load_job_config(self): + """Default ``LoadJobConfig``. + Will be merged into job configs passed into the ``load_table_*`` methods. + """ + return self._default_load_job_config + + @default_load_job_config.setter + def default_load_job_config(self, value: LoadJobConfig): + self._default_load_job_config = copy.deepcopy(value) + def close(self): """Close the underlying transport objects, releasing system resources. @@ -2330,8 +2346,8 @@ def load_table_from_uri( Raises: TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ job_id = _make_job_id(job_id, job_id_prefix) @@ -2348,11 +2364,14 @@ def load_table_from_uri( destination = _table_arg_to_table_ref(destination, default_project=self.project) - if job_config: - job_config = copy.deepcopy(job_config) - _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) + if job_config is not None: + _verify_job_config_type(job_config, LoadJobConfig) + else: + job_config = job.LoadJobConfig() - load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config) + new_job_config = job_config._fill_from_default(self._default_load_job_config) + + load_job = job.LoadJob(job_ref, source_uris, destination, self, new_job_config) load_job._begin(retry=retry, timeout=timeout) return load_job @@ -2424,8 +2443,8 @@ def load_table_from_file( mode. TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ job_id = _make_job_id(job_id, job_id_prefix) @@ -2437,10 +2456,15 @@ def load_table_from_file( destination = _table_arg_to_table_ref(destination, default_project=self.project) job_ref = job._JobReference(job_id, project=project, location=location) - if job_config: - job_config = copy.deepcopy(job_config) - _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) - load_job = job.LoadJob(job_ref, None, destination, self, job_config) + + if job_config is not None: + _verify_job_config_type(job_config, LoadJobConfig) + else: + job_config = job.LoadJobConfig() + + new_job_config = job_config._fill_from_default(self._default_load_job_config) + + load_job = job.LoadJob(job_ref, None, destination, self, new_job_config) job_resource = load_job.to_api_repr() if rewind: @@ -2564,43 +2588,40 @@ def load_table_from_dataframe( If a usable parquet engine cannot be found. This method requires :mod:`pyarrow` to be installed. TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ job_id = _make_job_id(job_id, job_id_prefix) - if job_config: - _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) - # Make a copy so that the job config isn't modified in-place. - job_config_properties = copy.deepcopy(job_config._properties) - job_config = job.LoadJobConfig() - job_config._properties = job_config_properties - + if job_config is not None: + _verify_job_config_type(job_config, LoadJobConfig) else: job_config = job.LoadJobConfig() + new_job_config = job_config._fill_from_default(self._default_load_job_config) + supported_formats = {job.SourceFormat.CSV, job.SourceFormat.PARQUET} - if job_config.source_format is None: + if new_job_config.source_format is None: # default value - job_config.source_format = job.SourceFormat.PARQUET + new_job_config.source_format = job.SourceFormat.PARQUET if ( - job_config.source_format == job.SourceFormat.PARQUET - and job_config.parquet_options is None + new_job_config.source_format == job.SourceFormat.PARQUET + and new_job_config.parquet_options is None ): parquet_options = ParquetOptions() # default value parquet_options.enable_list_inference = True - job_config.parquet_options = parquet_options + new_job_config.parquet_options = parquet_options - if job_config.source_format not in supported_formats: + if new_job_config.source_format not in supported_formats: raise ValueError( "Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format( - job_config.source_format + new_job_config.source_format ) ) - if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: + if pyarrow is None and new_job_config.source_format == job.SourceFormat.PARQUET: # pyarrow is now the only supported parquet engine. raise ValueError("This method requires pyarrow to be installed") @@ -2611,8 +2632,8 @@ def load_table_from_dataframe( # schema, and check if dataframe schema is compatible with it - except # for WRITE_TRUNCATE jobs, the existing schema does not matter then. if ( - not job_config.schema - and job_config.write_disposition != job.WriteDisposition.WRITE_TRUNCATE + not new_job_config.schema + and new_job_config.write_disposition != job.WriteDisposition.WRITE_TRUNCATE ): try: table = self.get_table(destination) @@ -2623,7 +2644,7 @@ def load_table_from_dataframe( name for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe) ) - job_config.schema = [ + new_job_config.schema = [ # Field description and policy tags are not needed to # serialize a data frame. SchemaField( @@ -2637,11 +2658,11 @@ def load_table_from_dataframe( if field.name in columns_and_indexes ] - job_config.schema = _pandas_helpers.dataframe_to_bq_schema( - dataframe, job_config.schema + new_job_config.schema = _pandas_helpers.dataframe_to_bq_schema( + dataframe, new_job_config.schema ) - if not job_config.schema: + if not new_job_config.schema: # the schema could not be fully detected warnings.warn( "Schema could not be detected for all columns. Loading from a " @@ -2652,13 +2673,13 @@ def load_table_from_dataframe( ) tmpfd, tmppath = tempfile.mkstemp( - suffix="_job_{}.{}".format(job_id[:8], job_config.source_format.lower()) + suffix="_job_{}.{}".format(job_id[:8], new_job_config.source_format.lower()) ) os.close(tmpfd) try: - if job_config.source_format == job.SourceFormat.PARQUET: + if new_job_config.source_format == job.SourceFormat.PARQUET: if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: msg = ( "Loading dataframe data in PARQUET format with pyarrow " @@ -2669,13 +2690,13 @@ def load_table_from_dataframe( ) warnings.warn(msg, category=RuntimeWarning) - if job_config.schema: + if new_job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() _pandas_helpers.dataframe_to_parquet( dataframe, - job_config.schema, + new_job_config.schema, tmppath, parquet_compression=parquet_compression, parquet_use_compliant_nested_type=True, @@ -2715,7 +2736,7 @@ def load_table_from_dataframe( job_id_prefix=job_id_prefix, location=location, project=project, - job_config=job_config, + job_config=new_job_config, timeout=timeout, ) @@ -2791,22 +2812,22 @@ def load_table_from_json( Raises: TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ job_id = _make_job_id(job_id, job_id_prefix) - if job_config: - _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig) - # Make a copy so that the job config isn't modified in-place. - job_config = copy.deepcopy(job_config) + if job_config is not None: + _verify_job_config_type(job_config, LoadJobConfig) else: job_config = job.LoadJobConfig() - job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON + new_job_config = job_config._fill_from_default(self._default_load_job_config) + + new_job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON - if job_config.schema is None: - job_config.autodetect = True + if new_job_config.schema is None: + new_job_config.autodetect = True if project is None: project = self.project @@ -2828,7 +2849,7 @@ def load_table_from_json( job_id_prefix=job_id_prefix, location=location, project=project, - job_config=job_config, + job_config=new_job_config, timeout=timeout, ) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 55e80b2eb..4073e0137 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -269,7 +269,7 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) - def _fill_from_default(self, default_job_config): + def _fill_from_default(self, default_job_config=None): """Merge this job config with a default job config. The keys in this object take precedence over the keys in the default @@ -283,6 +283,10 @@ def _fill_from_default(self, default_job_config): Returns: google.cloud.bigquery.job._JobConfig: A new (merged) job config. """ + if not default_job_config: + new_job_config = copy.deepcopy(self) + return new_job_config + if self._job_type != default_job_config._job_type: raise TypeError( "attempted to merge two incompatible job types: " diff --git a/tests/system/test_client.py b/tests/system/test_client.py index a69bb92c5..1437328a8 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2319,7 +2319,7 @@ def _table_exists(t): return False -def test_dbapi_create_view(dataset_id): +def test_dbapi_create_view(dataset_id: str): query = f""" CREATE VIEW {dataset_id}.dbapi_create_view @@ -2332,7 +2332,7 @@ def test_dbapi_create_view(dataset_id): assert Config.CURSOR.rowcount == 0, "expected 0 rows" -def test_parameterized_types_round_trip(dataset_id): +def test_parameterized_types_round_trip(dataset_id: str): client = Config.CLIENT table_id = f"{dataset_id}.test_parameterized_types_round_trip" fields = ( @@ -2358,7 +2358,7 @@ def test_parameterized_types_round_trip(dataset_id): assert tuple(s._key()[:2] for s in table2.schema) == fields -def test_table_snapshots(dataset_id): +def test_table_snapshots(dataset_id: str): from google.cloud.bigquery import CopyJobConfig from google.cloud.bigquery import OperationType @@ -2429,7 +2429,7 @@ def test_table_snapshots(dataset_id): assert rows == [(1, "one"), (2, "two")] -def test_table_clones(dataset_id): +def test_table_clones(dataset_id: str): from google.cloud.bigquery import CopyJobConfig from google.cloud.bigquery import OperationType diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 3ff96e874..a9760aa9b 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -1104,7 +1104,7 @@ def test_ctor_with_unknown_property_raises_error(self): config = self._make_one() config.wrong_name = None - def test_fill_from_default(self): + def test_fill_query_job_config_from_default(self): from google.cloud.bigquery import QueryJobConfig job_config = QueryJobConfig() @@ -1120,6 +1120,22 @@ def test_fill_from_default(self): self.assertTrue(final_job_config.use_query_cache) self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + def test_fill_load_job_from_default(self): + from google.cloud.bigquery import LoadJobConfig + + job_config = LoadJobConfig() + job_config.create_session = True + job_config.encoding = "UTF-8" + + default_job_config = LoadJobConfig() + default_job_config.ignore_unknown_values = True + default_job_config.encoding = "ISO-8859-1" + + final_job_config = job_config._fill_from_default(default_job_config) + self.assertTrue(final_job_config.create_session) + self.assertTrue(final_job_config.ignore_unknown_values) + self.assertEqual(final_job_config.encoding, "UTF-8") + def test_fill_from_default_conflict(self): from google.cloud.bigquery import QueryJobConfig @@ -1132,6 +1148,17 @@ def test_fill_from_default_conflict(self): with self.assertRaises(TypeError): basic_job_config._fill_from_default(conflicting_job_config) + def test_fill_from_empty_default_conflict(self): + from google.cloud.bigquery import QueryJobConfig + + job_config = QueryJobConfig() + job_config.dry_run = True + job_config.maximum_bytes_billed = 1000 + + final_job_config = job_config._fill_from_default(default_job_config=None) + self.assertTrue(final_job_config.dry_run) + self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") def test__get_sub_prop_wo_default(self, _get_sub_prop): job_config = self._make_one() diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f52eb825a..c155e2bc6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -239,6 +239,31 @@ def test_ctor_w_query_job_config(self): self.assertIsInstance(client._default_query_job_config, QueryJobConfig) self.assertTrue(client._default_query_job_config.dry_run) + def test_ctor_w_load_job_config(self): + from google.cloud.bigquery._http import Connection + from google.cloud.bigquery import LoadJobConfig + + creds = _make_credentials() + http = object() + location = "us-central" + job_config = LoadJobConfig() + job_config.create_session = True + + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + location=location, + default_load_job_config=job_config, + ) + self.assertIsInstance(client._connection, Connection) + self.assertIs(client._connection.credentials, creds) + self.assertIs(client._connection.http, http) + self.assertEqual(client.location, location) + + self.assertIsInstance(client._default_load_job_config, LoadJobConfig) + self.assertTrue(client._default_load_job_config.create_session) + def test__call_api_applying_custom_retry_on_timeout(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -426,6 +451,19 @@ def test_default_query_job_config(self): client.default_query_job_config = job_config self.assertIsInstance(client.default_query_job_config, QueryJobConfig) + def test_default_load_job_config(self): + from google.cloud.bigquery import LoadJobConfig + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + self.assertIsNone(client.default_load_job_config) + + job_config = LoadJobConfig() + job_config.create_session = True + client.default_load_job_config = job_config + self.assertIsInstance(client.default_load_job_config, LoadJobConfig) + def test_get_service_account_email(self): path = "/projects/%s/serviceAccount" % (self.PROJECT,) creds = _make_credentials() @@ -3282,6 +3320,146 @@ def test_load_table_from_uri_w_invalid_job_config(self): self.assertIn("Expected an instance of LoadJobConfig", exc.exception.args[0]) + def test_load_table_from_uri_w_explicit_job_config(self): + from google.cloud.bigquery.job import LoadJobConfig + + JOB = "job_name" + DESTINATION = "destination_table" + SOURCE_URI = "http://example.com/source.csv" + RESOURCE = { + "jobReference": {"jobId": JOB, "projectId": self.PROJECT}, + "configuration": { + "load": { + "sourceUris": [SOURCE_URI], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": DESTINATION, + }, + "createSession": True, + "encoding": "UTF-8", + } + }, + } + + creds = _make_credentials() + http = object() + + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RESOURCE) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION) + + job_config = LoadJobConfig() + job_config.create_session = True + job_config.encoding = "UTF-8" + client.load_table_from_uri( + SOURCE_URI, destination, job_id=JOB, job_config=job_config + ) + + # Check that load_table_from_uri actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=DEFAULT_TIMEOUT, + ) + + def test_load_table_from_uri_w_explicit_job_config_override(self): + from google.cloud.bigquery.job import LoadJobConfig + + JOB = "job_name" + DESTINATION = "destination_table" + SOURCE_URI = "http://example.com/source.csv" + RESOURCE = { + "jobReference": {"jobId": JOB, "projectId": self.PROJECT}, + "configuration": { + "load": { + "sourceUris": [SOURCE_URI], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": DESTINATION, + }, + "createSession": False, + "encoding": "ISO-8859-1", + } + }, + } + + creds = _make_credentials() + http = object() + default_job_config = LoadJobConfig() + default_job_config.create_session = True + default_job_config.encoding = "ISO-8859-1" + + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_load_job_config=default_job_config, + ) + conn = client._connection = make_connection(RESOURCE) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION) + + job_config = LoadJobConfig() + job_config.create_session = False + client.load_table_from_uri( + SOURCE_URI, destination, job_id=JOB, job_config=job_config + ) + + # Check that load_table_from_uri actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=DEFAULT_TIMEOUT, + ) + + def test_load_table_from_uri_w_default_load_config(self): + from google.cloud.bigquery.job import LoadJobConfig + + JOB = "job_name" + DESTINATION = "destination_table" + SOURCE_URI = "http://example.com/source.csv" + RESOURCE = { + "jobReference": {"jobId": JOB, "projectId": self.PROJECT}, + "configuration": { + "load": { + "sourceUris": [SOURCE_URI], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": DESTINATION, + }, + "encoding": "ISO-8859-1", + } + }, + } + + creds = _make_credentials() + http = object() + default_job_config = LoadJobConfig() + default_job_config.encoding = "ISO-8859-1" + + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_load_job_config=default_job_config, + ) + conn = client._connection = make_connection(RESOURCE) + destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION) + + client.load_table_from_uri(SOURCE_URI, destination, job_id=JOB) + + # Check that load_table_from_uri actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=DEFAULT_TIMEOUT, + ) + @staticmethod def _mock_requests_response(status_code, headers, content=b""): return mock.Mock( @@ -6940,6 +7118,118 @@ def test_load_table_from_file_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + def test_load_table_from_file_w_explicit_job_config(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + file_obj = self._make_file_obj() + + job_config = self._make_config() + job_config.create_session = True + job_config.encoding = "UTF-8" + do_upload_patch = self._make_do_upload_patch( + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, + self.TABLE_REF, + job_id="job_id", + project=self.PROJECT, + location=self.LOCATION, + job_config=job_config, + ) + + expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) + expected_resource["jobReference"]["location"] = self.LOCATION + expected_resource["jobReference"]["projectId"] = self.PROJECT + expected_resource["configuration"]["load"]["createSession"] = True + expected_resource["configuration"]["load"]["encoding"] = "UTF-8" + do_upload.assert_called_once_with( + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES, + DEFAULT_TIMEOUT, + project=self.PROJECT, + ) + + def test_load_table_from_file_w_explicit_job_config_override(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery.job import LoadJobConfig + + client = self._make_client() + file_obj = self._make_file_obj() + + default_job_config = LoadJobConfig() + default_job_config.create_session = True + default_job_config.encoding = "ISO-8859-1" + client.default_load_job_config = default_job_config + + job_config = self._make_config() + job_config.create_session = False + do_upload_patch = self._make_do_upload_patch( + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, + self.TABLE_REF, + job_id="job_id", + project=self.PROJECT, + location=self.LOCATION, + job_config=job_config, + ) + + expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) + expected_resource["jobReference"]["location"] = self.LOCATION + expected_resource["jobReference"]["projectId"] = self.PROJECT + expected_resource["configuration"]["load"]["createSession"] = False + expected_resource["configuration"]["load"]["encoding"] = "ISO-8859-1" + do_upload.assert_called_once_with( + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES, + DEFAULT_TIMEOUT, + project=self.PROJECT, + ) + + def test_load_table_from_file_w_default_load_config(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery.job import LoadJobConfig + + client = self._make_client() + file_obj = self._make_file_obj() + + default_job_config = LoadJobConfig() + default_job_config.encoding = "ISO-8859-1" + client.default_load_job_config = default_job_config + + job_config = self._make_config() + do_upload_patch = self._make_do_upload_patch( + client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION + ) + with do_upload_patch as do_upload: + client.load_table_from_file( + file_obj, + self.TABLE_REF, + job_id="job_id", + project=self.PROJECT, + location=self.LOCATION, + job_config=job_config, + ) + + expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION) + expected_resource["jobReference"]["location"] = self.LOCATION + expected_resource["jobReference"]["projectId"] = self.PROJECT + expected_resource["configuration"]["load"]["encoding"] = "ISO-8859-1" + do_upload.assert_called_once_with( + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES, + DEFAULT_TIMEOUT, + project=self.PROJECT, + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): @@ -7304,6 +7594,117 @@ def test_load_table_from_dataframe_w_list_inference_none(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_explicit_job_config_override(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + client.default_load_job_config = job.LoadJobConfig( + encoding="ISO-8859-1", + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.PARQUET, + ) + + job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_APPEND, + source_format=job.SourceFormat.PARQUET, + ) + original_config_copy = copy.deepcopy(job_config) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.write_disposition == job.WriteDisposition.WRITE_APPEND + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.encoding == "ISO-8859-1" + + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_default_load_config(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + client.default_load_job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.PARQUET, + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.write_disposition == job.WriteDisposition.WRITE_TRUNCATE + assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_false(self): @@ -8377,6 +8778,118 @@ def test_load_table_from_json_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + def test_load_table_from_json_w_explicit_job_config_override(self): + from google.cloud.bigquery import job + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + schema = [ + SchemaField("name", "STRING"), + SchemaField("age", "INTEGER"), + SchemaField("adult", "BOOLEAN"), + ] + client.default_load_job_config = job.LoadJobConfig( + schema=schema, encoding="ISO-8859-1" + ) + + override_schema = schema + override_schema[0] = SchemaField("username", "STRING") + job_config = job.LoadJobConfig(schema=override_schema) + original_config_copy = copy.deepcopy(job_config) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json( + json_rows, + self.TABLE_REF, + job_config=job_config, + project="project-x", + location="EU", + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location="EU", + project="project-x", + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema == override_schema + assert sent_config.encoding == "ISO-8859-1" + assert not sent_config.autodetect + + # the original config object should not have been modified + assert job_config.to_api_repr() == original_config_copy.to_api_repr() + + def test_load_table_from_json_w_default_job_config(self): + from google.cloud.bigquery import job + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + schema = [ + SchemaField("name", "STRING"), + SchemaField("age", "INTEGER"), + SchemaField("adult", "BOOLEAN"), + ] + client.default_load_job_config = job.LoadJobConfig(schema=schema) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json( + json_rows, + self.TABLE_REF, + job_config=None, + project="project-x", + location="EU", + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location="EU", + project="project-x", + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema == schema + def test_load_table_from_json_unicode_emoji_data_case(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES From 5e4465d0975f54e8da885006686d9431ff9c5653 Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Thu, 23 Mar 2023 11:17:18 -0700 Subject: [PATCH 101/536] feat: add bool, int, float, string dtype to to_dataframe (#1529) --- google/cloud/bigquery/_pandas_helpers.py | 25 +++-- google/cloud/bigquery/enums.py | 14 +++ google/cloud/bigquery/job/query.py | 55 ++++++++++- google/cloud/bigquery/table.py | 99 +++++++++++++++++++- tests/unit/test_table.py | 113 +++++++++++++++++++++++ 5 files changed, 294 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 3d7e7d793..dfd966c64 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -21,6 +21,7 @@ import logging import queue import warnings +from typing import Any, Union from packaging import version @@ -283,7 +284,13 @@ def bq_to_arrow_schema(bq_schema): return pyarrow.schema(arrow_fields) -def default_types_mapper(date_as_object: bool = False): +def default_types_mapper( + date_as_object: bool = False, + bool_dtype: Union[Any, None] = None, + int_dtype: Union[Any, None] = None, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, +): """Create a mapping from pyarrow types to pandas types. This overrides the pandas defaults to use null-safe extension types where @@ -299,8 +306,17 @@ def default_types_mapper(date_as_object: bool = False): """ def types_mapper(arrow_data_type): - if pyarrow.types.is_boolean(arrow_data_type): - return pandas.BooleanDtype() + if bool_dtype is not None and pyarrow.types.is_boolean(arrow_data_type): + return bool_dtype + + elif int_dtype is not None and pyarrow.types.is_integer(arrow_data_type): + return int_dtype + + elif float_dtype is not None and pyarrow.types.is_floating(arrow_data_type): + return float_dtype + + elif string_dtype is not None and pyarrow.types.is_string(arrow_data_type): + return string_dtype elif ( # If date_as_object is True, we know some DATE columns are @@ -310,9 +326,6 @@ def types_mapper(arrow_data_type): ): return db_dtypes.DateDtype() - elif pyarrow.types.is_integer(arrow_data_type): - return pandas.Int64Dtype() - elif pyarrow.types.is_time(arrow_data_type): return db_dtypes.TimeDtype() diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 45d43a2a7..e4e3d22fc 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -77,6 +77,20 @@ class CreateDisposition(object): returned in the job result.""" +class DefaultPandasDTypes(enum.Enum): + """Default Pandas DataFrem DTypes to convert BigQuery data. These + Sentinel values are used instead of None to maintain backward compatibility, + and allow Pandas package is not available. For more information: + https://stackoverflow.com/a/60605919/101923 + """ + + BOOL_DTYPE = object() + """Specifies default bool dtype""" + + INT_DTYPE = object() + """Specifies default integer dtype""" + + class DestinationFormat(object): """The exported file format. The default value is :attr:`CSV`. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index e6d6d682d..e4807cc63 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -28,7 +28,7 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration -from google.cloud.bigquery.enums import KeyResultStatementKind +from google.cloud.bigquery.enums import KeyResultStatementKind, DefaultPandasDTypes from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import ( @@ -53,6 +53,11 @@ from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +try: + import pandas # type: ignore +except ImportError: # pragma: NO COVER + pandas = None + if typing.TYPE_CHECKING: # pragma: NO COVER # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. @@ -1620,6 +1625,10 @@ def to_dataframe( create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_as_object: bool = False, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1672,6 +1681,46 @@ def to_dataframe( .. versionadded:: 2.24.0 + bool_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) + to convert BigQuery Boolean type, instead of relying on the default + ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type + + .. versionadded:: 3.7.1 + + int_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) + to convert BigQuery Integer types, instead of relying on the default + ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("int64")``. A list of BigQuery + Integer types can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + + .. versionadded:: 3.7.1 + + float_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) + to convert BigQuery Float type, instead of relying on the default + ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("float64")``. BigQuery Float + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + + .. versionadded:: 3.7.1 + + string_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to + convert BigQuery String type, instead of relying on the default + ``numpy.dtype("object")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("object")``. BigQuery String + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type + + .. versionadded:: 3.7.1 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data @@ -1694,6 +1743,10 @@ def to_dataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, geography_as_object=geography_as_object, + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, ) # If changing the signature of this method, make sure to apply the same diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a2110a9fb..93b0da67f 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -34,6 +34,11 @@ except ImportError: # pragma: NO COVER pyarrow = None +try: + import db_dtypes # type: ignore +except ImportError: # pragma: NO COVER + db_dtypes = None + try: import geopandas # type: ignore except ImportError: @@ -55,6 +60,7 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery.enums import DefaultPandasDTypes from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource @@ -88,6 +94,11 @@ _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' +_NO_SUPPORTED_DTYPE = ( + "The dtype cannot to be converted to a pandas ExtensionArray " + "because the necessary `__from_arrow__` attribute is missing." +) + def _reference_getter(table): """A :class:`~google.cloud.bigquery.table.TableReference` pointing to @@ -1920,6 +1931,10 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, geography_as_object: bool = False, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1958,6 +1973,7 @@ def to_dataframe( progress bar as a graphical dialog box. .. versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1975,6 +1991,46 @@ def to_dataframe( .. versionadded:: 2.24.0 + bool_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) + to convert BigQuery Boolean type, instead of relying on the default + ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type + + .. versionadded:: 3.7.1 + + int_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) + to convert BigQuery Integer types, instead of relying on the default + ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("int64")``. A list of BigQuery + Integer types can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + + .. versionadded:: 3.7.1 + + float_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) + to convert BigQuery Float type, instead of relying on the default + ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("float64")``. BigQuery Float + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + + .. versionadded:: 3.7.1 + + string_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to + convert BigQuery String type, instead of relying on the default + ``numpy.dtype("object")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("object")``. BigQuery String + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type + + .. versionadded:: 3.7.1 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -1987,7 +2043,9 @@ def to_dataframe( the :mod:`google.cloud.bigquery_storage_v1` module is required but cannot be imported. Also if `geography_as_object` is `True`, but the - :mod:`shapely` library cannot be imported. + :mod:`shapely` library cannot be imported. Also if + `bool_dtype`, `int_dtype` or other dtype parameters + is not supported dtype. """ _pandas_helpers.verify_pandas_imports() @@ -1995,6 +2053,24 @@ def to_dataframe( if geography_as_object and shapely is None: raise ValueError(_NO_SHAPELY_ERROR) + if bool_dtype is DefaultPandasDTypes.BOOL_DTYPE: + bool_dtype = pandas.BooleanDtype() + + if int_dtype is DefaultPandasDTypes.INT_DTYPE: + int_dtype = pandas.Int64Dtype() + + if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): + raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE) + + if int_dtype is not None and not hasattr(int_dtype, "__from_arrow__"): + raise ValueError("int_dtype", _NO_SUPPORTED_DTYPE) + + if float_dtype is not None and not hasattr(float_dtype, "__from_arrow__"): + raise ValueError("float_dtype", _NO_SUPPORTED_DTYPE) + + if string_dtype is not None and not hasattr(string_dtype, "__from_arrow__"): + raise ValueError("string_dtype", _NO_SUPPORTED_DTYPE) + if dtypes is None: dtypes = {} @@ -2019,15 +2095,15 @@ def to_dataframe( for col in record_batch # Type can be date32 or date64 (plus units). # See: https://arrow.apache.org/docs/python/api/datatypes.html - if str(col.type).startswith("date") + if pyarrow.types.is_date(col.type) ) timestamp_as_object = not all( self.__can_cast_timestamp_ns(col) for col in record_batch - # Type can be timestamp (plus units and time zone). + # Type can be datetime and timestamp (plus units and time zone). # See: https://arrow.apache.org/docs/python/api/datatypes.html - if str(col.type).startswith("timestamp") + if pyarrow.types.is_timestamp(col.type) ) if len(record_batch) > 0: @@ -2036,7 +2112,11 @@ def to_dataframe( timestamp_as_object=timestamp_as_object, integer_object_nulls=True, types_mapper=_pandas_helpers.default_types_mapper( - date_as_object=date_as_object + date_as_object=date_as_object, + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, ), ) else: @@ -2233,6 +2313,10 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, geography_as_object=False, + bool_dtype=None, + int_dtype=None, + float_dtype=None, + string_dtype=None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2241,6 +2325,11 @@ def to_dataframe( dtypes (Any): Ignored. Added for compatibility with RowIterator. progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + geography_as_object (bool): Ignored. Added for compatibility with RowIterator. + bool_dtype (Any): Ignored. Added for compatibility with RowIterator. + int_dtype (Any): Ignored. Added for compatibility with RowIterator. + float_dtype (Any): Ignored. Added for compatibility with RowIterator. + string_dtype (Any): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a79b98881..22c7c048d 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -55,6 +55,11 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import db_dtypes # type: ignore +except ImportError: # pragma: NO COVER + db_dtypes = None + try: import geopandas except (ImportError, AttributeError): # pragma: NO COVER @@ -3456,6 +3461,114 @@ def test_to_dataframe_w_various_types_nullable(self): self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_w_dtypes_mapper(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING"), + SchemaField("complete", "BOOL"), + SchemaField("age", "INTEGER"), + SchemaField("seconds", "INT64"), + SchemaField("miles", "FLOAT64"), + ] + row_data = [ + ["Phred Phlyntstone", "true", "32", "23000", "1.77"], + ["Bharney Rhubble", "false", "33", "454000", "6.66"], + ["Wylma Phlyntstone", "true", "29", "341000", "2.0"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe( + create_bqstorage_client=False, + bool_dtype=pandas.BooleanDtype(), + int_dtype=pandas.Int32Dtype(), + float_dtype=pandas.StringDtype(), + string_dtype=pandas.StringDtype(), + ) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(df.complete.dtype.name, "boolean") + self.assertEqual(df.age.dtype.name, "Int32") + self.assertEqual(df.seconds.dtype.name, "Int32") + self.assertEqual(df.miles.dtype.name, "string") + self.assertEqual(df.name.dtype.name, "string") + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_w_none_dtypes_mapper(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING"), + SchemaField("complete", "BOOL"), + SchemaField("age", "INTEGER"), + SchemaField("seconds", "INT64"), + SchemaField("miles", "FLOAT64"), + ] + row_data = [ + ["Phred Phlyntstone", "true", "32", "23000", "1.77"], + ["Bharney Rhubble", "false", "33", "454000", "6.66"], + ["Wylma Phlyntstone", "true", "29", "341000", "2.0"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe( + create_bqstorage_client=False, + bool_dtype=None, + int_dtype=None, + float_dtype=None, + string_dtype=None, + ) + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(df.complete.dtype.name, "bool") + self.assertEqual(df.age.dtype.name, "int64") + self.assertEqual(df.seconds.dtype.name, "int64") + self.assertEqual(df.miles.dtype.name, "float64") + self.assertEqual(df.name.dtype.name, "object") + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_w_unsupported_dtypes_mapper(self): + import numpy + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING"), + ] + row_data = [ + ["Phred Phlyntstone"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + bool_dtype=numpy.dtype("bool"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + int_dtype=numpy.dtype("int64"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + float_dtype=numpy.dtype("float64"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + string_dtype=numpy.dtype("object"), + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_column_dtypes(self): from google.cloud.bigquery.schema import SchemaField From 50e502674807b9771d7e26c0e784539bed8f9da6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 23 Mar 2023 20:17:02 -0500 Subject: [PATCH 102/536] fix: loosen ipywidgets restrictions further to address ipython compatibility issues (#1531) * fix: loosen ipywidgets restrictions further to address ipython compatibility issues * include ipywidgets in prerelease deps * show all package versions * add ipykernel dependency * ipykernel in noxfile * oops --- noxfile.py | 6 +++++- setup.py | 10 ++++++++-- testing/constraints-3.7.txt | 3 ++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/noxfile.py b/noxfile.py index f6283abf9..8464e4980 100644 --- a/noxfile.py +++ b/noxfile.py @@ -303,6 +303,10 @@ def prerelease_deps(session): session.install( "--pre", "--upgrade", + "IPython", + "ipykernel", + "ipywidgets", + "tqdm", "git+https://github.com/pypa/packaging.git", ) @@ -321,7 +325,6 @@ def prerelease_deps(session): "google-cloud-datacatalog", "google-cloud-storage", "google-cloud-testutils", - "IPython", "mock", "psutil", "pytest", @@ -356,6 +359,7 @@ def prerelease_deps(session): session.run("python", "-c", "import grpc; print(grpc.__version__)") session.run("python", "-c", "import pandas; print(pandas.__version__)") session.run("python", "-c", "import pyarrow; print(pyarrow.__version__)") + session.run("python", "-m", "pip", "freeze") # Run all tests, except a few samples tests which require extra dependencies. session.run("py.test", "tests/unit") diff --git a/setup.py b/setup.py index 2119e0191..51cb6dc75 100644 --- a/setup.py +++ b/setup.py @@ -67,9 +67,15 @@ pyarrow_dependency, "db-dtypes>=0.3.0,<2.0.0dev", ], - "ipywidgets": ["ipywidgets>=7.7.0,<8.0.1"], + "ipywidgets": [ + "ipywidgets>=7.7.0", + "ipykernel>=6.0.0", + ], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <2.0dev"], - "ipython": ["ipython>=7.0.1,!=8.1.0"], + "ipython": [ + "ipython>=7.23.1,!=8.1.0", + "ipykernel>=6.0.0", + ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 1.1.0", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 746656b58..c94d80abf 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -13,7 +13,8 @@ google-cloud-core==1.6.0 google-resumable-media==0.6.0 grpcio==1.47.0 ipywidgets==7.7.1 -ipython==7.0.1 +ipython==7.23.1 +ipykernel==6.0.0 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 From 3c925802046f0dd344f9ed350869fc78ceea5cd8 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 27 Mar 2023 09:03:04 -0500 Subject: [PATCH 103/536] chore(main): release 3.8.0 (#1525) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eda8912d..4c3fc839a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.8.0](https://github.com/googleapis/python-bigquery/compare/v3.7.0...v3.8.0) (2023-03-24) + + +### Features + +* Add bool, int, float, string dtype to to_dataframe ([#1529](https://github.com/googleapis/python-bigquery/issues/1529)) ([5e4465d](https://github.com/googleapis/python-bigquery/commit/5e4465d0975f54e8da885006686d9431ff9c5653)) +* Add default LoadJobConfig to Client ([#1526](https://github.com/googleapis/python-bigquery/issues/1526)) ([a2520ca](https://github.com/googleapis/python-bigquery/commit/a2520cabf7ec6bcb923c21e338188f1c10dc4d5d)) +* Expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob ([#1521](https://github.com/googleapis/python-bigquery/issues/1521)) ([8270a10](https://github.com/googleapis/python-bigquery/commit/8270a10df8f40750a7ac541a1781a71d7e79ce67)) + + +### Bug Fixes + +* Loosen ipywidgets restrictions further to address ipython compatibility issues ([#1531](https://github.com/googleapis/python-bigquery/issues/1531)) ([50e5026](https://github.com/googleapis/python-bigquery/commit/50e502674807b9771d7e26c0e784539bed8f9da6)) + ## [3.7.0](https://github.com/googleapis/python-bigquery/compare/v3.6.0...v3.7.0) (2023-03-06) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index dc87b3c5b..8f4ba4810 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.7.0" +__version__ = "3.8.0" From a69348a558f48cfc61d03d3e8bb7f9aee48bea86 Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Tue, 28 Mar 2023 07:54:39 -0700 Subject: [PATCH 104/536] fix: keyerror when the load_table_from_dataframe accesses a unmapped dtype dataframe index (#1535) --- google/cloud/bigquery/_pandas_helpers.py | 4 +- tests/unit/test__pandas_helpers.py | 106 +++++++++++++++++------ 2 files changed, 82 insertions(+), 28 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index dfd966c64..601aa13df 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -481,7 +481,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) if bq_type is None: - sample_data = _first_valid(dataframe[column]) + sample_data = _first_valid(dataframe.reset_index()[column]) if ( isinstance(sample_data, _BaseGeometry) and sample_data is not None # Paranoia @@ -544,7 +544,7 @@ def augment_schema(dataframe, current_bq_schema): augmented_schema.append(field) continue - arrow_table = pyarrow.array(dataframe[field.name]) + arrow_table = pyarrow.array(dataframe.reset_index()[field.name]) if pyarrow.types.is_list(arrow_table.type): # `pyarrow.ListType` diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 885cd318c..07bf03f66 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -930,32 +930,6 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test): assert columns_and_indexes == expected -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_dict_sequence(module_under_test): - df_data = collections.OrderedDict( - [ - ("str_column", ["hello", "world"]), - ("int_column", [42, 8]), - ("bool_column", [True, False]), - ] - ) - dataframe = pandas.DataFrame(df_data) - - dict_schema = [ - {"name": "str_column", "type": "STRING", "mode": "NULLABLE"}, - {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, - ] - - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) - - expected_schema = ( - schema.SchemaField("str_column", "STRING", "NULLABLE"), - schema.SchemaField("int_column", "INTEGER", "NULLABLE"), - schema.SchemaField("bool_column", "BOOL", "REQUIRED"), - ) - assert returned_schema == expected_schema - - @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_arrow_with_multiindex(module_under_test): bq_schema = ( @@ -1190,6 +1164,86 @@ def test_dataframe_to_parquet_compression_method(module_under_test): assert call_args.kwargs.get("compression") == "ZSTD" +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_named_index(module_under_test): + df_data = collections.OrderedDict( + [ + ("str_column", ["hello", "world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + index = pandas.Index(["a", "b"], name="str_index") + dataframe = pandas.DataFrame(df_data, index=index) + + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + + expected_schema = ( + schema.SchemaField("str_index", "STRING", "NULLABLE"), + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"), + ) + assert returned_schema == expected_schema + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_multiindex(module_under_test): + df_data = collections.OrderedDict( + [ + ("str_column", ["hello", "world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + index = pandas.MultiIndex.from_tuples( + [ + ("a", 0, datetime.datetime(1999, 12, 31, 23, 59, 59, 999999)), + ("a", 0, datetime.datetime(2000, 1, 1, 0, 0, 0)), + ], + names=["str_index", "int_index", "dt_index"], + ) + dataframe = pandas.DataFrame(df_data, index=index) + + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + + expected_schema = ( + schema.SchemaField("str_index", "STRING", "NULLABLE"), + schema.SchemaField("int_index", "INTEGER", "NULLABLE"), + schema.SchemaField("dt_index", "DATETIME", "NULLABLE"), + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"), + ) + assert returned_schema == expected_schema + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): + df_data = collections.OrderedDict( + [ + ("str_column", ["hello", "world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + dataframe = pandas.DataFrame(df_data) + + dict_schema = [ + {"name": "str_column", "type": "STRING", "mode": "NULLABLE"}, + {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, + ] + + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) + + expected_schema = ( + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOL", "REQUIRED"), + ) + assert returned_schema == expected_schema + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): dataframe = pandas.DataFrame( From 339eb0e86040a7c30d140800f34810ffc6a7c76b Mon Sep 17 00:00:00 2001 From: r1b Date: Tue, 28 Mar 2023 11:37:04 -0400 Subject: [PATCH 105/536] feat: expose query job on dbapi cursor (#1520) Co-authored-by: Tim Swast --- google/cloud/bigquery/dbapi/cursor.py | 10 ++++++++++ tests/unit/test_dbapi_cursor.py | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 03f3b72ca..0dc8f56ab 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -79,6 +79,16 @@ def __init__(self, connection): self._query_job = None self._closed = False + @property + def query_job(self): + """google.cloud.bigquery.job.query.QueryJob: The query job created by + the last ``execute*()`` call. + + .. note:: + If the last ``execute*()`` call was ``executemany()``, this is the + last job created by ``executemany()``.""" + return self._query_job + def close(self): """Mark the cursor as closed, preventing its further use.""" self._closed = True diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index b550bbce0..fc6ea3882 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -662,6 +662,29 @@ def test_is_iterable(self): "Iterating again over the same results should produce no rows.", ) + def test_query_job_wo_execute(self): + from google.cloud.bigquery import dbapi + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + self.assertIsNone(cursor.query_job) + + def test_query_job_w_execute(self): + from google.cloud.bigquery import dbapi, QueryJob + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + cursor.execute("SELECT 1;") + self.assertIsInstance(cursor.query_job, QueryJob) + + def test_query_job_w_executemany(self): + from google.cloud.bigquery import dbapi, QueryJob + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + cursor.executemany("SELECT %s;", (("1",), ("2",))) + self.assertIsInstance(cursor.query_job, QueryJob) + def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor From 5d0ebf462e49cb7aea474c9de3a8c2ea4fa11c58 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 28 Mar 2023 09:14:52 -0700 Subject: [PATCH 106/536] chore(main): release 3.9.0 (#1537) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3fc839a..5bbde01f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.9.0](https://github.com/googleapis/python-bigquery/compare/v3.8.0...v3.9.0) (2023-03-28) + + +### Features + +* Expose query job on dbapi cursor ([#1520](https://github.com/googleapis/python-bigquery/issues/1520)) ([339eb0e](https://github.com/googleapis/python-bigquery/commit/339eb0e86040a7c30d140800f34810ffc6a7c76b)) + + +### Bug Fixes + +* Keyerror when the load_table_from_dataframe accesses a unmapped dtype dataframe index ([#1535](https://github.com/googleapis/python-bigquery/issues/1535)) ([a69348a](https://github.com/googleapis/python-bigquery/commit/a69348a558f48cfc61d03d3e8bb7f9aee48bea86)) + ## [3.8.0](https://github.com/googleapis/python-bigquery/compare/v3.7.0...v3.8.0) (2023-03-24) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 8f4ba4810..0bc275357 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.8.0" +__version__ = "3.9.0" From 67698f79c4e5934370c0dd023b1c9957d1518769 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 29 Mar 2023 21:05:16 -0500 Subject: [PATCH 107/536] chore: update tests to be compatible with pandas 2.0 (#1538) * chore: update tests to be compatible with pandas 2.0 * use StringDtype without storage argument * avoid Float64Dtype on older pandas --- tests/unit/test_table.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 22c7c048d..9bdd7b596 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -3486,7 +3486,11 @@ def test_to_dataframe_w_dtypes_mapper(self): create_bqstorage_client=False, bool_dtype=pandas.BooleanDtype(), int_dtype=pandas.Int32Dtype(), - float_dtype=pandas.StringDtype(), + float_dtype=( + pandas.Float64Dtype() + if hasattr(pandas, "Float64Dtype") + else pandas.StringDtype() + ), string_dtype=pandas.StringDtype(), ) @@ -3494,7 +3498,10 @@ def test_to_dataframe_w_dtypes_mapper(self): self.assertEqual(df.complete.dtype.name, "boolean") self.assertEqual(df.age.dtype.name, "Int32") self.assertEqual(df.seconds.dtype.name, "Int32") - self.assertEqual(df.miles.dtype.name, "string") + self.assertEqual( + df.miles.dtype.name, + "Float64" if hasattr(pandas, "Float64Dtype") else "string", + ) self.assertEqual(df.name.dtype.name, "string") @unittest.skipIf(pandas is None, "Requires `pandas`") From be0255ed69ee6c912f9208a0edaf86dd3f26af08 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 30 Mar 2023 16:43:02 +0100 Subject: [PATCH 108/536] chore(deps): update all dependencies (#1522) --- samples/geography/requirements.txt | 18 +++++++++--------- samples/magics/requirements.txt | 12 ++++++------ samples/snippets/requirements.txt | 14 +++++++------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 75964dbe1..6cb0b6384 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -6,20 +6,20 @@ click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.0.5 -Fiona==1.9.1 +db-dtypes==1.1.0 +Fiona==1.9.2 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.16.2 -google-cloud-bigquery==3.6.0 -google-cloud-bigquery-storage==2.19.0 +google-auth==2.17.0 +google-cloud-bigquery==3.9.0 +google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.1 -googleapis-common-protos==1.58.0 -grpcio==1.51.3 +googleapis-common-protos==1.59.0 +grpcio==1.53.0 idna==3.4 libcst==0.4.9 munch==2.5.0 @@ -34,7 +34,7 @@ pyasn1-modules==0.2.8 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 -pytz==2022.7.1 +pytz==2023.3 PyYAML==6.0 requests==2.28.2 rsa==4.9 @@ -42,4 +42,4 @@ Shapely==2.0.1 six==1.16.0 typing-extensions==4.5.0 typing-inspect==0.8.0 -urllib3==1.26.14 +urllib3==1.26.15 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 55b828f1b..35be2c5da 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,15 +1,15 @@ -db-dtypes==1.0.5 -google-cloud-bigquery-storage==2.19.0 +db-dtypes==1.1.0 +google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.51.3 -ipywidgets==8.0.4 +grpcio==1.53.0 +ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.11.0; python_version >= '3.9' +ipython==8.12.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 -pytz==2022.7.1 +pytz==2023.3 typing-extensions==4.5.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6c6b17ea8..c8a15abaf 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ -db-dtypes==1.0.5 -google-cloud-bigquery==3.6.0 -google-cloud-bigquery-storage==2.19.0 +db-dtypes==1.1.0 +google-cloud-bigquery==3.9.0 +google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.51.3 -ipywidgets==8.0.4 +grpcio==1.53.0 +ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.11.0; python_version >= '3.9' +ipython==8.12.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==1.5.3; python_version >= '3.8' pyarrow==11.0.0 -pytz==2022.7.1 +pytz==2023.3 typing-extensions==4.5.0 From 0bf95460866089c8e955c97ae02f2fa443e1ef62 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 4 Apr 2023 10:20:10 -0400 Subject: [PATCH 109/536] chore: updates minimum version of bqstorage (#1542) * chore: updates minimum version of bqstorage * removes unneeded test * updates linting, removes unneeded comment --- setup.py | 2 +- testing/constraints-3.7.txt | 2 +- tests/unit/job/test_query_pandas.py | 6 ------ 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index 51cb6dc75..08106f694 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ # Keep the no-op bqstorage extra for backward compatibility. # See: https://github.com/googleapis/python-bigquery/issues/757 "bqstorage": [ - "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", + "google-cloud-bigquery-storage >= 2.6.0, <3.0.0dev", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index c94d80abf..2ea482e8b 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -8,7 +8,7 @@ db-dtypes==0.3.0 geopandas==0.9.0 google-api-core==1.31.5 -google-cloud-bigquery-storage==2.0.0 +google-cloud-bigquery-storage==2.6.0 google-cloud-core==1.6.0 google-resumable-media==0.6.0 grpcio==1.47.0 diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index a2444efdd..01b60ceb3 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -59,12 +59,6 @@ @pytest.fixture def table_read_options_kwarg(): - # Create a BigQuery Storage table read options object with pyarrow compression - # enabled if a recent-enough version of google-cloud-bigquery-storage dependency is - # installed to support the compression. - if not hasattr(bigquery_storage, "ArrowSerializationOptions"): - return {} - read_options = bigquery_storage.ReadSession.TableReadOptions( arrow_serialization_options=bigquery_storage.ArrowSerializationOptions( buffer_compression=bigquery_storage.ArrowSerializationOptions.CompressionCodec.LZ4_FRAME From 5838fd3463bc4e2c7bafa9f1eaf89a6f4693ad42 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 6 Apr 2023 17:08:56 +0100 Subject: [PATCH 110/536] chore(deps): update all dependencies (#1540) --- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6cb0b6384..7c6d98192 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -6,13 +6,13 @@ click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.1.0 +db-dtypes==1.1.1 Fiona==1.9.2 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.17.0 +google-auth==2.17.2 google-cloud-bigquery==3.9.0 google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 @@ -26,7 +26,7 @@ munch==2.5.0 mypy-extensions==1.0.0 packaging==23.0 pandas===1.3.5; python_version == '3.7' -pandas==1.5.3; python_version >= '3.8' +pandas==2.0.0; python_version >= '3.8' proto-plus==1.22.2 pyarrow==11.0.0 pyasn1==0.4.8 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 35be2c5da..2d7e464a8 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.1.0 +db-dtypes==1.1.1 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 grpcio==1.53.0 @@ -9,7 +9,7 @@ ipython==8.12.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.3; python_version >= '3.8' +pandas==2.0.0; python_version >= '3.8' pyarrow==11.0.0 pytz==2023.3 typing-extensions==4.5.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c8a15abaf..8f14d0dc1 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -db-dtypes==1.1.0 +db-dtypes==1.1.1 google-cloud-bigquery==3.9.0 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 @@ -10,7 +10,7 @@ ipython==8.12.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==1.5.3; python_version >= '3.8' +pandas==2.0.0; python_version >= '3.8' pyarrow==11.0.0 pytz==2023.3 typing-extensions==4.5.0 From 9d8409bbe78c25f63a427e60fa4f9913fdd3bdb5 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 18 Apr 2023 11:09:42 -0400 Subject: [PATCH 111/536] =?UTF-8?q?bug:=20fixes=20discrepancy=20btwn=20pyt?= =?UTF-8?q?hon-api-core=20&=20bigquery=20re=20object=20defa=E2=80=A6=20(#1?= =?UTF-8?q?541)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * bug: fixes discrepancy btwn python-api-core & bigquery re object default timeout * Fix: loosen ipywidget dependency (#1504) * fix: updates ipywidget dependency * fix: updates ipywidget version number * chore(main): release 3.6.0 (#1490) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * docs: Remove < 3.11 reference from README (#1502) * chore(python): upgrade gcp-releasetool in .kokoro [autoapprove] (#1508) Source-Link: https://github.com/googleapis/synthtool/commit/5f2a6089f73abf06238fe4310f6a14d6f6d1eed3 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8555f0e37e6261408f792bfd6635102d2da5ad73f8f09bcb24f25e6afb5fac97 Co-authored-by: Owl Bot * feat: add `connection_properties` and `create_session` to `LoadJobConfig` (#1509) * feat: added `connection_properties` and `create_session` in load job * chore(deps): update all dependencies (#1501) * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Anthonios Partheniou Co-authored-by: Owl Bot * chore(deps): update all dependencies (#1513) * feat: add default_query_job_config property and property setter to BQ client (#1511) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [feature request](https://togithub.com/googleapis/python-bigquery/issues/1512) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes - [feature request](https://togithub.com/googleapis/python-bigquery/issues/1512)🦕 - [internal bug](https://b.corp.google.com/issues/271044948) * chore(deps): update all dependencies (#1514) * chore(deps): update dependency charset-normalizer to v3.1.0 (#1518) * chore(main): release 3.7.0 (#1507) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * feat: expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob (#1521) * feat: expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob Note for google-cloud-bigquery developers: This also refactors these classes so that `_set_properties` does not modify the `_properties` dictionary in-place. Doing so was also mutating the request object, making it difficult to debug what request was _actually_ sent. Before this change, many tests hallucinated that the request was always equal to the response. * E google.api_core.exceptions.BadRequest: 400 Clone operation with write disposition WRITE_TRUNCATE is not supported. Please try again with WRITE_EMPTY. * chore(deps): Update nox in .kokoro/requirements.in [autoapprove] (#1527) Source-Link: https://github.com/googleapis/synthtool/commit/92006bb3cdc84677aa93c7f5235424ec2b157146 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 Co-authored-by: Owl Bot * feat: add default LoadJobConfig to Client (#1526) * feat: add bool, int, float, string dtype to to_dataframe (#1529) * fix: loosen ipywidgets restrictions further to address ipython compatibility issues (#1531) * fix: loosen ipywidgets restrictions further to address ipython compatibility issues * include ipywidgets in prerelease deps * show all package versions * add ipykernel dependency * ipykernel in noxfile * oops * chore(main): release 3.8.0 (#1525) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * fix: keyerror when the load_table_from_dataframe accesses a unmapped dtype dataframe index (#1535) * feat: expose query job on dbapi cursor (#1520) Co-authored-by: Tim Swast * chore(main): release 3.9.0 (#1537) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore: update tests to be compatible with pandas 2.0 (#1538) * chore: update tests to be compatible with pandas 2.0 * use StringDtype without storage argument * avoid Float64Dtype on older pandas * chore(deps): update all dependencies (#1522) * chore: updates minimum version of bqstorage (#1542) * chore: updates minimum version of bqstorage * removes unneeded test * updates linting, removes unneeded comment * updates conditional checks, comments, adds test * Removes test, adds pragma no cover * Removes test * fix linting error --------- Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Atsushi Yamamoto Co-authored-by: gcf-owl-bot[bot] <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Shobhit Singh Co-authored-by: Mend Renovate Co-authored-by: Anthonios Partheniou Co-authored-by: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Co-authored-by: Tim Swast Co-authored-by: r1b --- google/cloud/bigquery/job/query.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index e4807cc63..62668c601 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -764,7 +764,6 @@ def __init__(self, job_id, query, client, job_config=None): _helpers._set_sub_prop( self._properties, ["configuration", "query", "query"], query ) - self._query_results = None self._done_timeout = None self._transport_timeout = None @@ -1332,6 +1331,15 @@ def _reload_query_results( # the timeout from the futures API is respected. See: # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 timeout_ms = None + + # Python_API_core, as part of a major rewrite of the deadline, timeout, + # retry process sets the timeout value as a Python object(). + # Our system does not natively handle that and instead expects + # either none or a numeric value. If passed a Python object, convert to + # None. + if type(self._done_timeout) == object: # pragma: NO COVER + self._done_timeout = None + if self._done_timeout is not None: # Subtract a buffer for context switching, network latency, etc. api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS From 6458bbd5f035ba1ab9e422176cb717b76b65bb4a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 18 Apr 2023 18:59:44 +0200 Subject: [PATCH 112/536] chore(deps): update all dependencies (#1549) Co-authored-by: Anthonios Partheniou --- samples/geography/requirements-test.txt | 4 ++-- samples/geography/requirements.txt | 10 +++++----- samples/magics/requirements-test.txt | 4 ++-- samples/magics/requirements.txt | 2 +- samples/snippets/requirements-test.txt | 4 ++-- samples/snippets/requirements.txt | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index e0ec46254..3c3afdcb1 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.2.2 -mock==5.0.1 +pytest==7.3.1 +mock==5.0.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7c6d98192..49dd1c156 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ -attrs==22.2.0 +attrs==23.1.0 certifi==2022.12.7 cffi==1.15.1 charset-normalizer==3.1.0 @@ -7,24 +7,24 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.1.1 -Fiona==1.9.2 +Fiona==1.9.3 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.17.2 +google-auth==2.17.3 google-cloud-bigquery==3.9.0 google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.4.1 googleapis-common-protos==1.59.0 -grpcio==1.53.0 +grpcio==1.54.0 idna==3.4 libcst==0.4.9 munch==2.5.0 mypy-extensions==1.0.0 -packaging==23.0 +packaging==23.1 pandas===1.3.5; python_version == '3.7' pandas==2.0.0; python_version >= '3.8' proto-plus==1.22.2 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 3ed7558d5..9fa68a930 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.2 -mock==5.0.1 +pytest==7.3.1 +mock==5.0.2 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 2d7e464a8..956b03dda 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.1.1 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.53.0 +grpcio==1.54.0 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 3ed7558d5..9fa68a930 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.2 -mock==5.0.1 +pytest==7.3.1 +mock==5.0.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8f14d0dc1..034d9d00d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,7 +2,7 @@ db-dtypes==1.1.1 google-cloud-bigquery==3.9.0 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.53.0 +grpcio==1.54.0 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' From 64e913d73832f6363466cbea5ace2337c86fa58b Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Tue, 18 Apr 2023 15:14:47 -0700 Subject: [PATCH 113/536] feat: add date, datetime, time, timestamp dtype to to_dataframe (#1547) --- google/cloud/bigquery/_pandas_helpers.py | 27 ++- google/cloud/bigquery/enums.py | 6 + google/cloud/bigquery/job/query.py | 64 ++++++- google/cloud/bigquery/table.py | 128 +++++++++++--- tests/system/test_pandas.py | 98 +++++++++++ tests/unit/test_table.py | 209 ++++++++++++++++++++++- 6 files changed, 494 insertions(+), 38 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 601aa13df..a14dbec9b 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -290,6 +290,10 @@ def default_types_mapper( int_dtype: Union[Any, None] = None, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + date_dtype: Union[Any, None] = None, + datetime_dtype: Union[Any, None] = None, + time_dtype: Union[Any, None] = None, + timestamp_dtype: Union[Any, None] = None, ): """Create a mapping from pyarrow types to pandas types. @@ -321,13 +325,28 @@ def types_mapper(arrow_data_type): elif ( # If date_as_object is True, we know some DATE columns are # out-of-bounds of what is supported by pandas. - not date_as_object + date_dtype is not None + and not date_as_object and pyarrow.types.is_date(arrow_data_type) ): - return db_dtypes.DateDtype() + return date_dtype - elif pyarrow.types.is_time(arrow_data_type): - return db_dtypes.TimeDtype() + elif ( + datetime_dtype is not None + and pyarrow.types.is_timestamp(arrow_data_type) + and arrow_data_type.tz is None + ): + return datetime_dtype + + elif ( + timestamp_dtype is not None + and pyarrow.types.is_timestamp(arrow_data_type) + and arrow_data_type.tz is not None + ): + return timestamp_dtype + + elif time_dtype is not None and pyarrow.types.is_time(arrow_data_type): + return time_dtype return types_mapper diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index e4e3d22fc..553853630 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -90,6 +90,12 @@ class DefaultPandasDTypes(enum.Enum): INT_DTYPE = object() """Specifies default integer dtype""" + DATE_DTYPE = object() + """Specifies default date dtype""" + + TIME_DTYPE = object() + """Specifies default time dtype""" + class DestinationFormat(object): """The exported file format. The default value is :attr:`CSV`. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 62668c601..315d8201c 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -58,6 +58,11 @@ except ImportError: # pragma: NO COVER pandas = None +try: + import db_dtypes # type: ignore +except ImportError: # pragma: NO COVER + db_dtypes = None + if typing.TYPE_CHECKING: # pragma: NO COVER # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. @@ -1637,6 +1642,10 @@ def to_dataframe( int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + date_dtype: Union[Any, None] = DefaultPandasDTypes.DATE_DTYPE, + datetime_dtype: Union[Any, None] = None, + time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, + timestamp_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1697,7 +1706,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 int_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) @@ -1707,7 +1716,7 @@ def to_dataframe( Integer types can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 float_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) @@ -1717,7 +1726,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 string_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to @@ -1727,7 +1736,50 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 + + date_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date + type, instead of relying on the default ``db_dtypes.DateDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Date type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type + + .. versionadded:: 3.10.0 + + datetime_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime + type, instead of relying on the default ``numpy.dtype("datetime64[ns]``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type + + .. versionadded:: 3.10.0 + + time_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time + type, instead of relying on the default ``db_dtypes.TimeDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("object")``. BigQuery Time type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type + + .. versionadded:: 3.10.0 + + timestamp_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp + type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type + + .. versionadded:: 3.10.0 Returns: pandas.DataFrame: @@ -1755,6 +1807,10 @@ def to_dataframe( int_dtype=int_dtype, float_dtype=float_dtype, string_dtype=string_dtype, + date_dtype=date_dtype, + datetime_dtype=datetime_dtype, + time_dtype=time_dtype, + timestamp_dtype=timestamp_dtype, ) # If changing the signature of this method, make sure to apply the same diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 93b0da67f..a34e5dc25 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1935,6 +1935,10 @@ def to_dataframe( int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + date_dtype: Union[Any, None] = DefaultPandasDTypes.DATE_DTYPE, + datetime_dtype: Union[Any, None] = None, + time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, + timestamp_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1999,7 +2003,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 int_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) @@ -2009,7 +2013,7 @@ def to_dataframe( Integer types can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 float_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) @@ -2019,7 +2023,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 string_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to @@ -2029,7 +2033,50 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 + + date_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date + type, instead of relying on the default ``db_dtypes.DateDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Date type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type + + .. versionadded:: 3.10.0 + + datetime_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime + type, instead of relying on the default ``numpy.dtype("datetime64[ns]``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type + + .. versionadded:: 3.10.0 + + time_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time + type, instead of relying on the default ``db_dtypes.TimeDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("object")``. BigQuery Time type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type + + .. versionadded:: 3.10.0 + + timestamp_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp + type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type + + .. versionadded:: 3.10.0 Returns: pandas.DataFrame: @@ -2059,6 +2106,9 @@ def to_dataframe( if int_dtype is DefaultPandasDTypes.INT_DTYPE: int_dtype = pandas.Int64Dtype() + if time_dtype is DefaultPandasDTypes.TIME_DTYPE: + time_dtype = db_dtypes.TimeDtype() + if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE) @@ -2071,6 +2121,24 @@ def to_dataframe( if string_dtype is not None and not hasattr(string_dtype, "__from_arrow__"): raise ValueError("string_dtype", _NO_SUPPORTED_DTYPE) + if ( + date_dtype is not None + and date_dtype is not DefaultPandasDTypes.DATE_DTYPE + and not hasattr(date_dtype, "__from_arrow__") + ): + raise ValueError("date_dtype", _NO_SUPPORTED_DTYPE) + + if datetime_dtype is not None and not hasattr(datetime_dtype, "__from_arrow__"): + raise ValueError("datetime_dtype", _NO_SUPPORTED_DTYPE) + + if time_dtype is not None and not hasattr(time_dtype, "__from_arrow__"): + raise ValueError("time_dtype", _NO_SUPPORTED_DTYPE) + + if timestamp_dtype is not None and not hasattr( + timestamp_dtype, "__from_arrow__" + ): + raise ValueError("timestamp_dtype", _NO_SUPPORTED_DTYPE) + if dtypes is None: dtypes = {} @@ -2086,25 +2154,29 @@ def to_dataframe( create_bqstorage_client=create_bqstorage_client, ) - # When converting date or timestamp values to nanosecond precision, the result - # can be out of pyarrow bounds. To avoid the error when converting to - # Pandas, we set the date_as_object or timestamp_as_object parameter to True, - # if necessary. - date_as_object = not all( - self.__can_cast_timestamp_ns(col) - for col in record_batch - # Type can be date32 or date64 (plus units). - # See: https://arrow.apache.org/docs/python/api/datatypes.html - if pyarrow.types.is_date(col.type) - ) + # Default date dtype is `db_dtypes.DateDtype()` that could cause out of bounds error, + # when pyarrow converts date values to nanosecond precision. To avoid the error, we + # set the date_as_object parameter to True, if necessary. + date_as_object = False + if date_dtype is DefaultPandasDTypes.DATE_DTYPE: + date_dtype = db_dtypes.DateDtype() + date_as_object = not all( + self.__can_cast_timestamp_ns(col) + for col in record_batch + # Type can be date32 or date64 (plus units). + # See: https://arrow.apache.org/docs/python/api/datatypes.html + if pyarrow.types.is_date(col.type) + ) - timestamp_as_object = not all( - self.__can_cast_timestamp_ns(col) - for col in record_batch - # Type can be datetime and timestamp (plus units and time zone). - # See: https://arrow.apache.org/docs/python/api/datatypes.html - if pyarrow.types.is_timestamp(col.type) - ) + timestamp_as_object = False + if datetime_dtype is None and timestamp_dtype is None: + timestamp_as_object = not all( + self.__can_cast_timestamp_ns(col) + for col in record_batch + # Type can be datetime and timestamp (plus units and time zone). + # See: https://arrow.apache.org/docs/python/api/datatypes.html + if pyarrow.types.is_timestamp(col.type) + ) if len(record_batch) > 0: df = record_batch.to_pandas( @@ -2117,6 +2189,10 @@ def to_dataframe( int_dtype=int_dtype, float_dtype=float_dtype, string_dtype=string_dtype, + date_dtype=date_dtype, + datetime_dtype=datetime_dtype, + time_dtype=time_dtype, + timestamp_dtype=timestamp_dtype, ), ) else: @@ -2317,6 +2393,10 @@ def to_dataframe( int_dtype=None, float_dtype=None, string_dtype=None, + date_dtype=None, + datetime_dtype=None, + time_dtype=None, + timestamp_dtype=None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2330,6 +2410,10 @@ def to_dataframe( int_dtype (Any): Ignored. Added for compatibility with RowIterator. float_dtype (Any): Ignored. Added for compatibility with RowIterator. string_dtype (Any): Ignored. Added for compatibility with RowIterator. + date_dtype (Any): Ignored. Added for compatibility with RowIterator. + datetime_dtype (Any): Ignored. Added for compatibility with RowIterator. + time_dtype (Any): Ignored. Added for compatibility with RowIterator. + timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 91305b450..ea8cc6d63 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -34,6 +34,7 @@ pandas = pytest.importorskip("pandas", minversion="0.23.0") +pyarrow = pytest.importorskip("pyarrow") numpy = pytest.importorskip("numpy") bigquery_storage = pytest.importorskip( @@ -1109,6 +1110,103 @@ def test_list_rows_nullable_scalars_extreme_dtypes( assert df.dtypes["string_col"].name == "object" +@pytest.mark.parametrize( + ("max_results",), + ( + (None,), + (10,), + ), # Use BQ Storage API. # Use REST API. +) +def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype( + bigquery_client, scalars_extreme_table, max_results +): + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + + df = bigquery_client.list_rows( + scalars_extreme_table, + max_results=max_results, + selected_fields=schema, + ).to_dataframe( + bool_dtype=pandas.BooleanDtype(), + int_dtype=pandas.Int64Dtype(), + float_dtype=( + pandas.Float64Dtype() + if hasattr(pandas, "Float64Dtype") + else pandas.StringDtype() + ), + string_dtype=pandas.StringDtype(), + date_dtype=( + pandas.ArrowDtype(pyarrow.date32()) + if hasattr(pandas, "ArrowDtype") + else None + ), + datetime_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + time_dtype=( + pandas.ArrowDtype(pyarrow.time64("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + timestamp_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC")) + if hasattr(pandas, "ArrowDtype") + else None + ), + ) + + # These pandas dtypes are handled by the custom dtypes. + assert df.dtypes["bool_col"].name == "boolean" + assert df.dtypes["float64_col"].name == "Float64" + assert df.dtypes["int64_col"].name == "Int64" + assert df.dtypes["string_col"].name == "string" + + assert ( + df.dtypes["date_col"].name == "date32[day][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "datetime64[ns]" + ) + assert ( + df.dtypes["datetime_col"].name == "timestamp[us][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "object" + ) + assert ( + df.dtypes["timestamp_col"].name == "timestamp[us, tz=UTC][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "object" + ) + assert ( + df.dtypes["time_col"].name == "time64[us][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "object" + ) + + # decimal.Decimal is used to avoid loss of precision. + assert df.dtypes["numeric_col"].name == "object" + assert df.dtypes["bignumeric_col"].name == "object" + + # pandas uses Python bytes objects. + assert df.dtypes["bytes_col"].name == "object" + + def test_upload_time_and_datetime_56(bigquery_client, dataset_id): df = pandas.DataFrame( dict( diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 9bdd7b596..53db635fa 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -46,6 +46,7 @@ PYARROW_VERSION = pkg_resources.parse_version("0.0.1") if pyarrow: + import pyarrow import pyarrow.types PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) @@ -3471,11 +3472,45 @@ def test_to_dataframe_w_dtypes_mapper(self): SchemaField("age", "INTEGER"), SchemaField("seconds", "INT64"), SchemaField("miles", "FLOAT64"), + SchemaField("date", "DATE"), + SchemaField("datetime", "DATETIME"), + SchemaField("time", "TIME"), + SchemaField("timestamp", "TIMESTAMP"), ] row_data = [ - ["Phred Phlyntstone", "true", "32", "23000", "1.77"], - ["Bharney Rhubble", "false", "33", "454000", "6.66"], - ["Wylma Phlyntstone", "true", "29", "341000", "2.0"], + [ + "Phred Phlyntstone", + "true", + "32", + "23000", + "1.77", + "1999-12-01", + "1999-12-31T00:00:00.000000", + "00:00:00.000000", + "1433836800000000", + ], + [ + "Bharney Rhubble", + "false", + "33", + "454000", + "6.66", + "4567-06-14", + "4567-12-31T00:00:00.000000", + "12:00:00.232413", + "81953424000000000", + ], + [ + "Wylma Phlyntstone", + "true", + "29", + "341000", + "2.0", + "9999-12-31", + "9999-12-31T23:59:59.999999", + "23:59:59.999999", + "253402261199999999", + ], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3492,18 +3527,136 @@ def test_to_dataframe_w_dtypes_mapper(self): else pandas.StringDtype() ), string_dtype=pandas.StringDtype(), + date_dtype=( + pandas.ArrowDtype(pyarrow.date32()) + if hasattr(pandas, "ArrowDtype") + else None + ), + datetime_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + time_dtype=( + pandas.ArrowDtype(pyarrow.time64("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + timestamp_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC")) + if hasattr(pandas, "ArrowDtype") + else None + ), ) self.assertIsInstance(df, pandas.DataFrame) + + self.assertEqual(list(df.complete), [True, False, True]) self.assertEqual(df.complete.dtype.name, "boolean") + + self.assertEqual(list(df.age), [32, 33, 29]) self.assertEqual(df.age.dtype.name, "Int32") + + self.assertEqual(list(df.seconds), [23000, 454000, 341000]) self.assertEqual(df.seconds.dtype.name, "Int32") + self.assertEqual( - df.miles.dtype.name, - "Float64" if hasattr(pandas, "Float64Dtype") else "string", + list(df.name), ["Phred Phlyntstone", "Bharney Rhubble", "Wylma Phlyntstone"] ) self.assertEqual(df.name.dtype.name, "string") + if hasattr(pandas, "Float64Dtype"): + self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) + self.assertEqual(df.miles.dtype.name, "Float64") + else: + self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) + self.assertEqual(df.miles.dtype.name, "string") + + if hasattr(pandas, "ArrowDtype"): + self.assertEqual( + list(df.date), + [ + datetime.date(1999, 12, 1), + datetime.date(4567, 6, 14), + datetime.date(9999, 12, 31), + ], + ) + self.assertEqual(df.date.dtype.name, "date32[day][pyarrow]") + + self.assertEqual( + list(df.datetime), + [ + datetime.datetime(1999, 12, 31, 0, 0), + datetime.datetime(4567, 12, 31, 0, 0), + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ) + self.assertEqual(df.datetime.dtype.name, "timestamp[us][pyarrow]") + + self.assertEqual( + list(df.time), + [ + datetime.time(0, 0), + datetime.time(12, 0, 0, 232413), + datetime.time(23, 59, 59, 999999), + ], + ) + self.assertEqual(df.time.dtype.name, "time64[us][pyarrow]") + + self.assertEqual( + list(df.timestamp), + [ + datetime.datetime(2015, 6, 9, 8, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(4567, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime( + 9999, 12, 31, 12, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + ], + ) + self.assertEqual(df.timestamp.dtype.name, "timestamp[us, tz=UTC][pyarrow]") + else: + self.assertEqual( + list(df.date), + [ + pandas.Timestamp("1999-12-01 00:00:00"), + pandas.Timestamp("2229-03-27 01:41:45.161793536"), + pandas.Timestamp("1816-03-29 05:56:08.066277376"), + ], + ) + self.assertEqual(df.date.dtype.name, "datetime64[ns]") + + self.assertEqual( + list(df.datetime), + [ + datetime.datetime(1999, 12, 31, 0, 0), + datetime.datetime(4567, 12, 31, 0, 0), + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ) + self.assertEqual(df.datetime.dtype.name, "object") + + self.assertEqual( + list(df.time), + [ + datetime.time(0, 0), + datetime.time(12, 0, 0, 232413), + datetime.time(23, 59, 59, 999999), + ], + ) + self.assertEqual(df.time.dtype.name, "object") + + self.assertEqual( + list(df.timestamp), + [ + datetime.datetime(2015, 6, 9, 8, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(4567, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime( + 9999, 12, 31, 12, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + ], + ) + self.assertEqual(df.timestamp.dtype.name, "object") + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_none_dtypes_mapper(self): from google.cloud.bigquery.schema import SchemaField @@ -3514,11 +3667,23 @@ def test_to_dataframe_w_none_dtypes_mapper(self): SchemaField("age", "INTEGER"), SchemaField("seconds", "INT64"), SchemaField("miles", "FLOAT64"), + SchemaField("date", "DATE"), + SchemaField("datetime", "DATETIME"), + SchemaField("time", "TIME"), + SchemaField("timestamp", "TIMESTAMP"), ] row_data = [ - ["Phred Phlyntstone", "true", "32", "23000", "1.77"], - ["Bharney Rhubble", "false", "33", "454000", "6.66"], - ["Wylma Phlyntstone", "true", "29", "341000", "2.0"], + [ + "Phred Phlyntstone", + "true", + "32", + "23000", + "1.77", + "1999-12-01", + "1999-12-31T00:00:00.000000", + "23:59:59.999999", + "1433836800000000", + ], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3531,6 +3696,10 @@ def test_to_dataframe_w_none_dtypes_mapper(self): int_dtype=None, float_dtype=None, string_dtype=None, + date_dtype=None, + datetime_dtype=None, + time_dtype=None, + timestamp_dtype=None, ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(df.complete.dtype.name, "bool") @@ -3538,6 +3707,10 @@ def test_to_dataframe_w_none_dtypes_mapper(self): self.assertEqual(df.seconds.dtype.name, "int64") self.assertEqual(df.miles.dtype.name, "float64") self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.date.dtype.name, "datetime64[ns]") + self.assertEqual(df.datetime.dtype.name, "datetime64[ns]") + self.assertEqual(df.time.dtype.name, "object") + self.assertEqual(df.timestamp.dtype.name, "datetime64[ns, UTC]") @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_unsupported_dtypes_mapper(self): @@ -3575,6 +3748,26 @@ def test_to_dataframe_w_unsupported_dtypes_mapper(self): create_bqstorage_client=False, string_dtype=numpy.dtype("object"), ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + date_dtype=numpy.dtype("object"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + datetime_dtype=numpy.dtype("datetime64[us]"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + time_dtype=numpy.dtype("datetime64[us]"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + timestamp_dtype=numpy.dtype("datetime64[us]"), + ) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_column_dtypes(self): From 70e2a8a5044a72b6323c15c713607e80b561b7d4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 19 Apr 2023 09:35:21 -0400 Subject: [PATCH 114/536] chore(main): release 3.10.0 (#1555) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bbde01f4..034f4f324 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.10.0](https://github.com/googleapis/python-bigquery/compare/v3.9.0...v3.10.0) (2023-04-18) + + +### Features + +* Add date, datetime, time, timestamp dtype to to_dataframe ([#1547](https://github.com/googleapis/python-bigquery/issues/1547)) ([64e913d](https://github.com/googleapis/python-bigquery/commit/64e913d73832f6363466cbea5ace2337c86fa58b)) + ## [3.9.0](https://github.com/googleapis/python-bigquery/compare/v3.8.0...v3.9.0) (2023-03-28) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0bc275357..b674396b2 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.9.0" +__version__ = "3.10.0" From b73b30267674bde03688638471ff489fb5ac20b5 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 19 Apr 2023 18:08:58 +0200 Subject: [PATCH 115/536] chore(deps): update dependency google-cloud-bigquery to v3.10.0 (#1556) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 49dd1c156..8afe5ef4a 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.12.2; python_version >= '3.8' google-api-core==2.11.0 google-auth==2.17.3 -google-cloud-bigquery==3.9.0 +google-cloud-bigquery==3.10.0 google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 034d9d00d..aa1015481 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.1.1 -google-cloud-bigquery==3.9.0 +google-cloud-bigquery==3.10.0 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 grpcio==1.54.0 From 075aa66a7488636606d1ac0fe21690a37904746a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 20 Apr 2023 18:31:32 +0200 Subject: [PATCH 116/536] chore(deps): update all dependencies (#1557) --- samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8afe5ef4a..df992f2bf 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -29,8 +29,8 @@ pandas===1.3.5; python_version == '3.7' pandas==2.0.0; python_version >= '3.8' proto-plus==1.22.2 pyarrow==11.0.0 -pyasn1==0.4.8 -pyasn1-modules==0.2.8 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 pycparser==2.21 pyparsing==3.0.9 python-dateutil==2.8.2 From fa6e13d5006caadb36899b4e2a24ca82b7f11b17 Mon Sep 17 00:00:00 2001 From: abdelmegahed <131036743+abdelmegahed@users.noreply.github.com> Date: Wed, 17 May 2023 15:57:28 -0400 Subject: [PATCH 117/536] fix: handle case when expirationMs is None (#1553) * hotfix: handle case when expirationMs is None * Add test for unsetting table exp * Update tests/unit/test_table.py * Update exp_resource for the unsetting_exp test --------- Co-authored-by: Tim Swast --- google/cloud/bigquery/table.py | 6 +++++- tests/unit/test_table.py | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a34e5dc25..bf4a90317 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -687,7 +687,11 @@ def partition_expiration(self, value): if self.time_partitioning is None: self._properties[api_field] = {"type": TimePartitioningType.DAY} - self._properties[api_field]["expirationMs"] = str(value) + + if value is None: + self._properties[api_field]["expirationMs"] = None + else: + self._properties[api_field]["expirationMs"] = str(value) @property def clustering_fields(self): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 53db635fa..a221bc89e 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1190,6 +1190,25 @@ def test_to_api_repr_w_custom_field(self): } self.assertEqual(resource, exp_resource) + def test_to_api_repr_w_unsetting_expiration(self): + from google.cloud.bigquery.table import TimePartitioningType + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.partition_expiration = None + resource = table.to_api_repr() + + exp_resource = { + "tableReference": table_ref.to_api_repr(), + "labels": {}, + "timePartitioning": { + "expirationMs": None, + "type": TimePartitioningType.DAY, + }, + } + self.assertEqual(resource, exp_resource) + def test__build_resource_w_custom_field(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) From 9ea2e21c35783782993d1ad2d3b910bbe9981ce2 Mon Sep 17 00:00:00 2001 From: Gal Zahavi <38544478+galz10@users.noreply.github.com> Date: Thu, 18 May 2023 10:52:48 -0700 Subject: [PATCH 118/536] fix: filter None values from OpenTelemetry attributes (#1567) * fix: filter None values from OpenTelemetry attributes * moved filtering out before return value --- google/cloud/bigquery/opentelemetry_tracing.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index 3d0a66ba8..0e1187c6b 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -97,6 +97,11 @@ def _get_final_span_attributes(attributes=None, client=None, job_ref=None): final_attributes.update(job_attributes) if attributes: final_attributes.update(attributes) + + filtered = {k: v for k, v in final_attributes.items() if v is not None} + final_attributes.clear() + final_attributes.update(filtered) + return final_attributes From 84ad11d00d99d279e4e6e0fa4ca60e59575b1dad Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 19 May 2023 15:03:04 -0700 Subject: [PATCH 119/536] feat: add remote function options to routines (#1558) * feat: add remote function options This PR adds support for defining routines as remote UDFs. * basic integration test * augment tests * rename prop * augment tests * more testing * cover shenanigans --------- Co-authored-by: Tim Swast --- google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/routine/__init__.py | 2 + google/cloud/bigquery/routine/routine.py | 153 ++++++++++++++++++ .../routine/test_remote_function_options.py | 128 +++++++++++++++ tests/unit/routine/test_routine.py | 57 +++++++ 5 files changed, 342 insertions(+) create mode 100644 tests/unit/routine/test_remote_function_options.py diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index ebd5b3109..40e3a1578 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -93,6 +93,7 @@ from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType +from google.cloud.bigquery.routine import RemoteFunctionOptions from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.standard_sql import StandardSqlDataType @@ -154,6 +155,7 @@ "Routine", "RoutineArgument", "RoutineReference", + "RemoteFunctionOptions", # Shared helpers "SchemaField", "PolicyTagList", diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py index 7353073c8..e576b0d49 100644 --- a/google/cloud/bigquery/routine/__init__.py +++ b/google/cloud/bigquery/routine/__init__.py @@ -20,6 +20,7 @@ from google.cloud.bigquery.routine.routine import RoutineArgument from google.cloud.bigquery.routine.routine import RoutineReference from google.cloud.bigquery.routine.routine import RoutineType +from google.cloud.bigquery.routine.routine import RemoteFunctionOptions __all__ = ( @@ -28,4 +29,5 @@ "RoutineArgument", "RoutineReference", "RoutineType", + "RemoteFunctionOptions", ) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 3c0919003..36ed03728 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -67,6 +67,7 @@ class Routine(object): "type_": "routineType", "description": "description", "determinism_level": "determinismLevel", + "remote_function_options": "remoteFunctionOptions", } def __init__(self, routine_ref, **kwargs) -> None: @@ -297,6 +298,37 @@ def determinism_level(self): def determinism_level(self, value): self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value + @property + def remote_function_options(self): + """Optional[google.cloud.bigquery.routine.RemoteFunctionOptions]: Configures remote function + options for a routine. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.routine.RemoteFunctionOptions` or + :data:`None`. + """ + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["remote_function_options"] + ) + if prop is not None: + return RemoteFunctionOptions.from_api_repr(prop) + + @remote_function_options.setter + def remote_function_options(self, value): + api_repr = value + if isinstance(value, RemoteFunctionOptions): + api_repr = value.to_api_repr() + elif value is not None: + raise ValueError( + "value must be google.cloud.bigquery.routine.RemoteFunctionOptions " + "or None" + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["remote_function_options"] + ] = api_repr + @classmethod def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. @@ -563,3 +595,124 @@ def __str__(self): This is a fully-qualified ID, including the project ID and dataset ID. """ return "{}.{}.{}".format(self.project, self.dataset_id, self.routine_id) + + +class RemoteFunctionOptions(object): + """Configuration options for controlling remote BigQuery functions.""" + + _PROPERTY_TO_API_FIELD = { + "endpoint": "endpoint", + "connection": "connection", + "max_batching_rows": "maxBatchingRows", + "user_defined_context": "userDefinedContext", + } + + def __init__( + self, + endpoint=None, + connection=None, + max_batching_rows=None, + user_defined_context=None, + _properties=None, + ) -> None: + if _properties is None: + _properties = {} + self._properties = _properties + + if endpoint is not None: + self.endpoint = endpoint + if connection is not None: + self.connection = connection + if max_batching_rows is not None: + self.max_batching_rows = max_batching_rows + if user_defined_context is not None: + self.user_defined_context = user_defined_context + + @property + def connection(self): + """string: Fully qualified name of the user-provided connection object which holds the authentication information to send requests to the remote service. + + Format is "projects/{projectId}/locations/{locationId}/connections/{connectionId}" + """ + return _helpers._str_or_none(self._properties.get("connection")) + + @connection.setter + def connection(self, value): + self._properties["connection"] = _helpers._str_or_none(value) + + @property + def endpoint(self): + """string: Endpoint of the user-provided remote service + + Example: "https://us-east1-my_gcf_project.cloudfunctions.net/remote_add" + """ + return _helpers._str_or_none(self._properties.get("endpoint")) + + @endpoint.setter + def endpoint(self, value): + self._properties["endpoint"] = _helpers._str_or_none(value) + + @property + def max_batching_rows(self): + """int64: Max number of rows in each batch sent to the remote service. + + If absent or if 0, BigQuery dynamically decides the number of rows in a batch. + """ + return _helpers._int_or_none(self._properties.get("maxBatchingRows")) + + @max_batching_rows.setter + def max_batching_rows(self, value): + self._properties["maxBatchingRows"] = _helpers._str_or_none(value) + + @property + def user_defined_context(self): + """Dict[str, str]: User-defined context as a set of key/value pairs, + which will be sent as function invocation context together with + batched arguments in the requests to the remote service. The total + number of bytes of keys and values must be less than 8KB. + """ + return self._properties.get("userDefinedContext") + + @user_defined_context.setter + def user_defined_context(self, value): + if not isinstance(value, dict): + raise ValueError("value must be dictionary") + self._properties["userDefinedContext"] = value + + @classmethod + def from_api_repr(cls, resource: dict) -> "RemoteFunctionOptions": + """Factory: construct remote function options given its API representation. + + Args: + resource (Dict[str, object]): Resource, as returned from the API. + + Returns: + google.cloud.bigquery.routine.RemoteFunctionOptions: + Python object, as parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self) -> dict: + """Construct the API resource representation of this RemoteFunctionOptions. + + Returns: + Dict[str, object]: Remote function options represented as an API resource. + """ + return self._properties + + def __eq__(self, other): + if not isinstance(other, RemoteFunctionOptions): + return NotImplemented + return self._properties == other._properties + + def __ne__(self, other): + return not self == other + + def __repr__(self): + all_properties = [ + "{}={}".format(property_name, repr(getattr(self, property_name))) + for property_name in sorted(self._PROPERTY_TO_API_FIELD) + ] + return "RemoteFunctionOptions({})".format(", ".join(all_properties)) diff --git a/tests/unit/routine/test_remote_function_options.py b/tests/unit/routine/test_remote_function_options.py new file mode 100644 index 000000000..b476dca1e --- /dev/null +++ b/tests/unit/routine/test_remote_function_options.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +ENDPOINT = "https://some.endpoint" +CONNECTION = "connection_string" +MAX_BATCHING_ROWS = 50 +USER_DEFINED_CONTEXT = { + "foo": "bar", +} + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery.routine import RemoteFunctionOptions + + return RemoteFunctionOptions + + +def test_ctor(target_class): + + options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + assert options.endpoint == ENDPOINT + assert options.connection == CONNECTION + assert options.max_batching_rows == MAX_BATCHING_ROWS + assert options.user_defined_context == USER_DEFINED_CONTEXT + + +def test_empty_ctor(target_class): + options = target_class() + assert options._properties == {} + options = target_class(_properties=None) + assert options._properties == {} + options = target_class(_properties={}) + assert options._properties == {} + + +def test_ctor_bad_context(target_class): + with pytest.raises(ValueError, match="value must be dictionary"): + target_class(user_defined_context=[1, 2, 3, 4]) + + +def test_from_api_repr(target_class): + resource = { + "endpoint": ENDPOINT, + "connection": CONNECTION, + "maxBatchingRows": MAX_BATCHING_ROWS, + "userDefinedContext": USER_DEFINED_CONTEXT, + "someRandomField": "someValue", + } + options = target_class.from_api_repr(resource) + assert options.endpoint == ENDPOINT + assert options.connection == CONNECTION + assert options.max_batching_rows == MAX_BATCHING_ROWS + assert options.user_defined_context == USER_DEFINED_CONTEXT + assert options._properties["someRandomField"] == "someValue" + + +def test_from_api_repr_w_minimal_resource(target_class): + resource = {} + options = target_class.from_api_repr(resource) + assert options.endpoint is None + assert options.connection is None + assert options.max_batching_rows is None + assert options.user_defined_context is None + + +def test_from_api_repr_w_unknown_fields(target_class): + resource = {"thisFieldIsNotInTheProto": "just ignore me"} + options = target_class.from_api_repr(resource) + assert options._properties is resource + + +def test_eq(target_class): + options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + other_options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + assert options == other_options + assert not (options != other_options) + + empty_options = target_class() + assert not (options == empty_options) + assert options != empty_options + + notanarg = object() + assert not (options == notanarg) + assert options != notanarg + + +def test_repr(target_class): + options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + actual_repr = repr(options) + assert actual_repr == ( + "RemoteFunctionOptions(connection='connection_string', endpoint='https://some.endpoint', max_batching_rows=50, user_defined_context={'foo': 'bar'})" + ) diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index 80a3def73..87767200c 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -75,6 +75,13 @@ def test_ctor_w_properties(target_class): description = "A routine description." determinism_level = bigquery.DeterminismLevel.NOT_DETERMINISTIC + options = bigquery.RemoteFunctionOptions( + endpoint="https://some.endpoint", + connection="connection_string", + max_batching_rows=99, + user_defined_context={"foo": "bar"}, + ) + actual_routine = target_class( routine_id, arguments=arguments, @@ -84,6 +91,7 @@ def test_ctor_w_properties(target_class): type_=type_, description=description, determinism_level=determinism_level, + remote_function_options=options, ) ref = RoutineReference.from_string(routine_id) @@ -97,6 +105,18 @@ def test_ctor_w_properties(target_class): assert ( actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC ) + assert actual_routine.remote_function_options == options + + +def test_ctor_invalid_remote_function_options(target_class): + with pytest.raises( + ValueError, + match=".*must be google.cloud.bigquery.routine.RemoteFunctionOptions.*", + ): + target_class( + "my-proj.my_dset.my_routine", + remote_function_options=object(), + ) def test_from_api_repr(target_class): @@ -126,6 +146,14 @@ def test_from_api_repr(target_class): "someNewField": "someValue", "description": "A routine description.", "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC, + "remoteFunctionOptions": { + "endpoint": "https://some.endpoint", + "connection": "connection_string", + "maxBatchingRows": 50, + "userDefinedContext": { + "foo": "bar", + }, + }, } actual_routine = target_class.from_api_repr(resource) @@ -160,6 +188,10 @@ def test_from_api_repr(target_class): assert actual_routine._properties["someNewField"] == "someValue" assert actual_routine.description == "A routine description." assert actual_routine.determinism_level == "DETERMINISTIC" + assert actual_routine.remote_function_options.endpoint == "https://some.endpoint" + assert actual_routine.remote_function_options.connection == "connection_string" + assert actual_routine.remote_function_options.max_batching_rows == 50 + assert actual_routine.remote_function_options.user_defined_context == {"foo": "bar"} def test_from_api_repr_tvf_function(target_class): @@ -261,6 +293,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.type_ is None assert actual_routine.description is None assert actual_routine.determinism_level is None + assert actual_routine.remote_function_options is None def test_from_api_repr_w_unknown_fields(target_class): @@ -421,6 +454,24 @@ def test_from_api_repr_w_unknown_fields(target_class): ["someNewField"], {"someNewField": "someValue"}, ), + ( + { + "routineType": "SCALAR_FUNCTION", + "remoteFunctionOptions": { + "endpoint": "https://some_endpoint", + "connection": "connection_string", + "max_batching_rows": 101, + }, + }, + ["remote_function_options"], + { + "remoteFunctionOptions": { + "endpoint": "https://some_endpoint", + "connection": "connection_string", + "max_batching_rows": 101, + }, + }, + ), ], ) def test_build_resource(object_under_test, resource, filter_fields, expected): @@ -497,6 +548,12 @@ def test_set_description_w_none(object_under_test): assert object_under_test._properties["description"] is None +def test_set_remote_function_options_w_none(object_under_test): + object_under_test.remote_function_options = None + assert object_under_test.remote_function_options is None + assert object_under_test._properties["remoteFunctionOptions"] is None + + def test_repr(target_class): model = target_class("my-proj.my_dset.my_routine") actual_routine = repr(model) From 280656c47f0aca924c881bdde811702f8693a469 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 25 May 2023 12:44:22 -0400 Subject: [PATCH 120/536] build(deps): bump requests from 2.28.1 to 2.31.0 in /synthtool/gcp/templates/python_library/.kokoro (#1574) Source-Link: https://github.com/googleapis/synthtool/commit/30bd01b4ab78bf1b2a425816e15b3e7e090993dd Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:9bc5fa3b62b091f60614c08a7fb4fd1d3e1678e326f34dd66ce1eefb5dc3267b Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 3 ++- .kokoro/requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index b8edda51c..32b3c4865 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 + digest: sha256:9bc5fa3b62b091f60614c08a7fb4fd1d3e1678e326f34dd66ce1eefb5dc3267b +# created: 2023-05-25T14:56:16.294623272Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 66a2172a7..3b8d7ee81 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -419,9 +419,9 @@ readme-renderer==37.3 \ --hash=sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273 \ --hash=sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343 # via twine -requests==2.28.1 \ - --hash=sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983 \ - --hash=sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349 +requests==2.31.0 \ + --hash=sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f \ + --hash=sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1 # via # gcp-releasetool # google-api-core From aa09482dec2b67839759856874e5ddffef0d700a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 26 May 2023 00:12:04 +0200 Subject: [PATCH 121/536] chore(deps): update dependency requests to v2.31.0 [security] (#1573) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index df992f2bf..d5c384e07 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -36,7 +36,7 @@ pyparsing==3.0.9 python-dateutil==2.8.2 pytz==2023.3 PyYAML==6.0 -requests==2.28.2 +requests==2.31.0 rsa==4.9 Shapely==2.0.1 six==1.16.0 From 8d93ba9d4f1fe7e995774020cb6d5268bb0cecfb Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 1 Jun 2023 13:24:58 +0200 Subject: [PATCH 122/536] chore(deps): update all dependencies (#1560) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: meredithslota Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 24 ++++++++++++------------ samples/magics/requirements.txt | 10 +++++----- samples/snippets/requirements.txt | 10 +++++----- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d5c384e07..b6695909c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==23.1.0 -certifi==2022.12.7 +certifi==2023.5.7 cffi==1.15.1 charset-normalizer==3.1.0 click==8.1.3 @@ -7,28 +7,28 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.1.1 -Fiona==1.9.3 +Fiona==1.9.4.post1 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' -geopandas==0.12.2; python_version >= '3.8' +geopandas==0.13.0; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.17.3 +google-auth==2.18.1 google-cloud-bigquery==3.10.0 google-cloud-bigquery-storage==2.19.1 google-cloud-core==2.3.2 google-crc32c==1.5.0 -google-resumable-media==2.4.1 +google-resumable-media==2.5.0 googleapis-common-protos==1.59.0 -grpcio==1.54.0 +grpcio==1.54.2 idna==3.4 -libcst==0.4.9 -munch==2.5.0 +libcst==1.0.0 +munch==3.0.0 mypy-extensions==1.0.0 packaging==23.1 pandas===1.3.5; python_version == '3.7' -pandas==2.0.0; python_version >= '3.8' +pandas==2.0.1; python_version >= '3.8' proto-plus==1.22.2 -pyarrow==11.0.0 +pyarrow==12.0.0 pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 @@ -40,6 +40,6 @@ requests==2.31.0 rsa==4.9 Shapely==2.0.1 six==1.16.0 -typing-extensions==4.5.0 -typing-inspect==0.8.0 +typing-extensions==4.6.2 +typing-inspect==0.9.0 urllib3==1.26.15 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 956b03dda..b50144baa 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.1.1 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.54.0 +grpcio==1.54.2 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.12.0; python_version >= '3.9' +ipython==8.13.2; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.0; python_version >= '3.8' -pyarrow==11.0.0 +pandas==2.0.1; python_version >= '3.8' +pyarrow==12.0.0 pytz==2023.3 -typing-extensions==4.5.0 +typing-extensions==4.6.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index aa1015481..726bd2046 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,15 +2,15 @@ db-dtypes==1.1.1 google-cloud-bigquery==3.10.0 google-cloud-bigquery-storage==2.19.1 google-auth-oauthlib==1.0.0 -grpcio==1.54.0 +grpcio==1.54.2 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.12.0; python_version >= '3.9' +ipython==8.13.2; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.0; python_version >= '3.8' -pyarrow==11.0.0 +pandas==2.0.1; python_version >= '3.8' +pyarrow==12.0.0 pytz==2023.3 -typing-extensions==4.5.0 +typing-extensions==4.6.2 From ff70298564fa8a2c16b12a86b1e9f23f15e6821a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 1 Jun 2023 13:25:48 +0200 Subject: [PATCH 123/536] chore(deps): update all dependencies (#1560) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: meredithslota Co-authored-by: Anthonios Partheniou From d2b2c96818e79d16bea7eeb4c125933d1d0b2394 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 1 Jun 2023 13:58:41 +0200 Subject: [PATCH 124/536] chore(deps): update all dependencies (#1576) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index b6695909c..82a1daadc 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,9 +12,9 @@ geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.13.0; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.18.1 +google-auth==2.19.0 google-cloud-bigquery==3.10.0 -google-cloud-bigquery-storage==2.19.1 +google-cloud-bigquery-storage==2.20.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 google-resumable-media==2.5.0 @@ -26,7 +26,7 @@ munch==3.0.0 mypy-extensions==1.0.0 packaging==23.1 pandas===1.3.5; python_version == '3.7' -pandas==2.0.1; python_version >= '3.8' +pandas==2.0.2; python_version >= '3.8' proto-plus==1.22.2 pyarrow==12.0.0 pyasn1==0.5.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index b50144baa..b545916c3 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.1.1 -google-cloud-bigquery-storage==2.19.1 +google-cloud-bigquery-storage==2.20.0 google-auth-oauthlib==1.0.0 grpcio==1.54.2 ipywidgets==8.0.6 @@ -9,7 +9,7 @@ ipython==8.13.2; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.1; python_version >= '3.8' +pandas==2.0.2; python_version >= '3.8' pyarrow==12.0.0 pytz==2023.3 typing-extensions==4.6.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 726bd2046..d2878d202 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.1.1 google-cloud-bigquery==3.10.0 -google-cloud-bigquery-storage==2.19.1 +google-cloud-bigquery-storage==2.20.0 google-auth-oauthlib==1.0.0 grpcio==1.54.2 ipywidgets==8.0.6 @@ -10,7 +10,7 @@ ipython==8.13.2; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.1; python_version >= '3.8' +pandas==2.0.2; python_version >= '3.8' pyarrow==12.0.0 pytz==2023.3 typing-extensions==4.6.2 From 09cc1df6babaf90ea0b0a6fd926f8013822a31ed Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 1 Jun 2023 11:10:15 -0500 Subject: [PATCH 125/536] fix: raise most recent exception when not able to fetch query job after starting the job (#1362) * fix: raise most recent exception when not able to fetch query job after starting the job Towards internal issue 247809965 * update unit test * revert most changes to the test and explain why we're looking for a different exception from the original 'conflict' --- google/cloud/bigquery/_job_helpers.py | 2 +- tests/unit/test_client.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 33fc72261..57846b190 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -105,7 +105,7 @@ def do_query(): timeout=timeout, ) except core_exceptions.GoogleAPIError: # (includes RetryError) - raise create_exc + raise else: return query_job else: diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c155e2bc6..cf0aa4028 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5092,12 +5092,14 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): QueryJob, "_begin", side_effect=job_create_error ) get_job_patcher = mock.patch.object( - client, "get_job", side_effect=DataLoss("we lost yor job, sorry") + client, "get_job", side_effect=DataLoss("we lost your job, sorry") ) with job_begin_patcher, get_job_patcher: - # If get job request fails, the original exception should be raised. - with pytest.raises(Conflict, match="Job already exists."): + # If get job request fails but supposedly there does exist a job + # with this ID already, raise the exception explaining why we + # couldn't recover the job. + with pytest.raises(DataLoss, match="we lost your job, sorry"): client.query("SELECT 1;", job_id=None) def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): From 3360e430c06fca6d0da71e8b0f3c0dba2e0170dd Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 1 Jun 2023 09:41:36 -0700 Subject: [PATCH 126/536] chore(main): release 3.11.0 (#1568) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 034f4f324..bc9cfd7b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.0](https://github.com/googleapis/python-bigquery/compare/v3.10.0...v3.11.0) (2023-06-01) + + +### Features + +* Add remote function options to routines ([#1558](https://github.com/googleapis/python-bigquery/issues/1558)) ([84ad11d](https://github.com/googleapis/python-bigquery/commit/84ad11d00d99d279e4e6e0fa4ca60e59575b1dad)) + + +### Bug Fixes + +* Filter None values from OpenTelemetry attributes ([#1567](https://github.com/googleapis/python-bigquery/issues/1567)) ([9ea2e21](https://github.com/googleapis/python-bigquery/commit/9ea2e21c35783782993d1ad2d3b910bbe9981ce2)) +* Handle case when expirationMs is None ([#1553](https://github.com/googleapis/python-bigquery/issues/1553)) ([fa6e13d](https://github.com/googleapis/python-bigquery/commit/fa6e13d5006caadb36899b4e2a24ca82b7f11b17)) +* Raise most recent exception when not able to fetch query job after starting the job ([#1362](https://github.com/googleapis/python-bigquery/issues/1362)) ([09cc1df](https://github.com/googleapis/python-bigquery/commit/09cc1df6babaf90ea0b0a6fd926f8013822a31ed)) + ## [3.10.0](https://github.com/googleapis/python-bigquery/compare/v3.9.0...v3.10.0) (2023-04-18) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index b674396b2..0e93e961e 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.10.0" +__version__ = "3.11.0" From 63ba4a1f9898d3dd93bbc53295712a7b9fedc3ea Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 3 Jun 2023 19:22:05 -0400 Subject: [PATCH 127/536] build(deps): bump cryptography from 39.0.1 to 41.0.0 in /synthtool/gcp/templates/python_library/.kokoro (#1579) Source-Link: https://github.com/googleapis/synthtool/commit/d0f51a0c2a9a6bcca86911eabea9e484baadf64b Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:240b5bcc2bafd450912d2da2be15e62bc6de2cf839823ae4bf94d4f392b451dc Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/requirements.txt | 42 +++++++++++++++++++-------------------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 32b3c4865..02a4dedce 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:9bc5fa3b62b091f60614c08a7fb4fd1d3e1678e326f34dd66ce1eefb5dc3267b -# created: 2023-05-25T14:56:16.294623272Z + digest: sha256:240b5bcc2bafd450912d2da2be15e62bc6de2cf839823ae4bf94d4f392b451dc +# created: 2023-06-03T21:25:37.968717478Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 3b8d7ee81..c7929db6d 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -113,28 +113,26 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==39.0.1 \ - --hash=sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4 \ - --hash=sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f \ - --hash=sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502 \ - --hash=sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41 \ - --hash=sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965 \ - --hash=sha256:6f8ba7f0328b79f08bdacc3e4e66fb4d7aab0c3584e0bd41328dce5262e26b2e \ - --hash=sha256:706843b48f9a3f9b9911979761c91541e3d90db1ca905fd63fee540a217698bc \ - --hash=sha256:807ce09d4434881ca3a7594733669bd834f5b2c6d5c7e36f8c00f691887042ad \ - --hash=sha256:83e17b26de248c33f3acffb922748151d71827d6021d98c70e6c1a25ddd78505 \ - --hash=sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388 \ - --hash=sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6 \ - --hash=sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2 \ - --hash=sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac \ - --hash=sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695 \ - --hash=sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6 \ - --hash=sha256:e422abdec8b5fa8462aa016786680720d78bdce7a30c652b7fadf83a4ba35336 \ - --hash=sha256:ef8b72fa70b348724ff1218267e7f7375b8de4e8194d1636ee60510aae104cd0 \ - --hash=sha256:f0c64d1bd842ca2633e74a1a28033d139368ad959872533b1bab8c80e8240a0c \ - --hash=sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106 \ - --hash=sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a \ - --hash=sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8 +cryptography==41.0.0 \ + --hash=sha256:0ddaee209d1cf1f180f1efa338a68c4621154de0afaef92b89486f5f96047c55 \ + --hash=sha256:14754bcdae909d66ff24b7b5f166d69340ccc6cb15731670435efd5719294895 \ + --hash=sha256:344c6de9f8bda3c425b3a41b319522ba3208551b70c2ae00099c205f0d9fd3be \ + --hash=sha256:34d405ea69a8b34566ba3dfb0521379b210ea5d560fafedf9f800a9a94a41928 \ + --hash=sha256:3680248309d340fda9611498a5319b0193a8dbdb73586a1acf8109d06f25b92d \ + --hash=sha256:3c5ef25d060c80d6d9f7f9892e1d41bb1c79b78ce74805b8cb4aa373cb7d5ec8 \ + --hash=sha256:4ab14d567f7bbe7f1cdff1c53d5324ed4d3fc8bd17c481b395db224fb405c237 \ + --hash=sha256:5c1f7293c31ebc72163a9a0df246f890d65f66b4a40d9ec80081969ba8c78cc9 \ + --hash=sha256:6b71f64beeea341c9b4f963b48ee3b62d62d57ba93eb120e1196b31dc1025e78 \ + --hash=sha256:7d92f0248d38faa411d17f4107fc0bce0c42cae0b0ba5415505df72d751bf62d \ + --hash=sha256:8362565b3835ceacf4dc8f3b56471a2289cf51ac80946f9087e66dc283a810e0 \ + --hash=sha256:84a165379cb9d411d58ed739e4af3396e544eac190805a54ba2e0322feb55c46 \ + --hash=sha256:88ff107f211ea696455ea8d911389f6d2b276aabf3231bf72c8853d22db755c5 \ + --hash=sha256:9f65e842cb02550fac96536edb1d17f24c0a338fd84eaf582be25926e993dde4 \ + --hash=sha256:a4fc68d1c5b951cfb72dfd54702afdbbf0fb7acdc9b7dc4301bbf2225a27714d \ + --hash=sha256:b7f2f5c525a642cecad24ee8670443ba27ac1fab81bba4cc24c7b6b41f2d0c75 \ + --hash=sha256:b846d59a8d5a9ba87e2c3d757ca019fa576793e8758174d3868aecb88d6fc8eb \ + --hash=sha256:bf8fc66012ca857d62f6a347007e166ed59c0bc150cefa49f28376ebe7d992a2 \ + --hash=sha256:f5d0bf9b252f30a31664b6f64432b4730bb7038339bd18b1fafe129cfc2be9be # via # gcp-releasetool # secretstorage From 6efdce13cc3b25d37d22a856f2308daed569e637 Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Fri, 9 Jun 2023 18:01:56 +0300 Subject: [PATCH 128/536] docs: add/reformat return types for cloud RAD docs (#1582) * docs: add/reformat return types for cloud RAD docs * fix typos --- google/cloud/bigquery/client.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index d8fbfb69e..d81816d41 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -327,7 +327,8 @@ def get_service_account_email( before using ``retry``. Returns: - str: service account email address + str: + service account email address Example: @@ -1932,7 +1933,8 @@ def job_from_resource( resource (Dict): one job resource from API response Returns: - The job instance, constructed via the resource. + Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]: + The job instance, constructed via the resource. """ config = resource.get("configuration", {}) if "load" in config: @@ -2064,7 +2066,8 @@ def get_job( before using ``retry``. Returns: - Job instance, based on the resource returned by the API. + Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: + Job instance, based on the resource returned by the API. """ extra_params = {"projection": "full"} @@ -3954,12 +3957,13 @@ def _schema_to_json_file_object(self, schema_list, file_obj): """ json.dump(schema_list, file_obj, indent=2, sort_keys=True) - def schema_from_json(self, file_or_path: "PathType"): + def schema_from_json(self, file_or_path: "PathType") -> List[SchemaField]: """Takes a file object or file path that contains json that describes a table schema. Returns: - List of schema field objects. + List[SchemaField]: + List of :class:`~google.cloud.bigquery.schema.SchemaField` objects. """ if isinstance(file_or_path, io.IOBase): return self._schema_from_json_file_object(file_or_path) From 41799b4d22101bef8d3b00b52759a2390e7c624d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 13 Jun 2023 11:11:38 -0400 Subject: [PATCH 129/536] chore(main): release 3.11.1 (#1583) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc9cfd7b4..9003d2bfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.1](https://github.com/googleapis/python-bigquery/compare/v3.11.0...v3.11.1) (2023-06-09) + + +### Documentation + +* Add/reformat return types for cloud RAD docs ([#1582](https://github.com/googleapis/python-bigquery/issues/1582)) ([6efdce1](https://github.com/googleapis/python-bigquery/commit/6efdce13cc3b25d37d22a856f2308daed569e637)) + ## [3.11.0](https://github.com/googleapis/python-bigquery/compare/v3.10.0...v3.11.0) (2023-06-01) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0e93e961e..90c53a0dd 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.11.0" +__version__ = "3.11.1" From d73cf495b8dfa032a43dc1d58599d0691aaa0efb Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 21 Jun 2023 10:56:12 -0400 Subject: [PATCH 130/536] fix: updates tests based on revised hacker_news tables (#1591) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes four broken tests that failed due to an unexpected change in the Google Public Dataset: Hacker News. The `comments` table was deleted and only the `full` table remained. This edit updates the name of the table in four tests and updates the names of columns in the table as well as updates the expected results for one of the tests. Fixes #1590 🦕 --- tests/system/test_client.py | 29 +++++++++++++++-------------- tests/system/test_pandas.py | 20 ++++++++++---------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 1437328a8..f4757e30f 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1706,8 +1706,8 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): cursor.execute( """ - SELECT id, `by`, time_ts - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp + FROM `bigquery-public-data.hacker_news.full` ORDER BY `id` ASC LIMIT 100000 """ @@ -1717,27 +1717,28 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): field_name = operator.itemgetter(0) fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] - # Since DB API is not thread safe, only a single result stream should be # requested by the BQ storage client, meaning that results should arrive # in the sorted order. + expected_data = [ [ - ("by", "sama"), - ("id", 15), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), + ("by", "pg"), + ("id", 1), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 21, 51, tzinfo=UTC)), ], [ - ("by", "pg"), - ("id", 17), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), + ("by", "phyllis"), + ("id", 2), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 30, 28, tzinfo=UTC)), ], [ - ("by", "pg"), - ("id", 22), - ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), + ("by", "phyllis"), + ("id", 3), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 40, 33, tzinfo=UTC)), ], ] + self.assertEqual(fetched_data, expected_data) def test_dbapi_dry_run_query(self): @@ -1769,8 +1770,8 @@ def test_dbapi_connection_does_not_leak_sockets(self): cursor.execute( """ - SELECT id, `by`, time_ts - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp + FROM `bigquery-public-data.hacker_news.full` ORDER BY `id` ASC LIMIT 100000 """ diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index ea8cc6d63..726b68f7c 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -740,8 +740,8 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( def test_query_results_to_dataframe(bigquery_client): QUERY = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp, dead + FROM `bigquery-public-data.hacker_news.full` LIMIT 10 """ @@ -749,12 +749,12 @@ def test_query_results_to_dataframe(bigquery_client): assert isinstance(df, pandas.DataFrame) assert len(df) == 10 # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] + column_names = ["id", "by", "timestamp", "dead"] assert list(df) == column_names # verify the column names exp_datatypes = { "id": int, - "author": str, - "time_ts": pandas.Timestamp, + "by": str, + "timestamp": pandas.Timestamp, "dead": bool, } for _, row in df.iterrows(): @@ -766,8 +766,8 @@ def test_query_results_to_dataframe(bigquery_client): def test_query_results_to_dataframe_w_bqstorage(bigquery_client): query = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp, dead + FROM `bigquery-public-data.hacker_news.full` LIMIT 10 """ @@ -779,12 +779,12 @@ def test_query_results_to_dataframe_w_bqstorage(bigquery_client): assert isinstance(df, pandas.DataFrame) assert len(df) == 10 # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] + column_names = ["id", "by", "timestamp", "dead"] assert list(df) == column_names exp_datatypes = { "id": int, - "author": str, - "time_ts": pandas.Timestamp, + "by": str, + "timestamp": pandas.Timestamp, "dead": bool, } for index, row in df.iterrows(): From 7d5ba5e0fa9376b4e6baa9b49cef7ed383240ab6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 21 Jun 2023 14:40:13 -0400 Subject: [PATCH 131/536] test: adjusts test input body based on changes to google-api_core (#1588) Adjusts the body of a specific test to adapt to some changes that come through from google-api-core. --- tests/unit/job/test_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index a9760aa9b..a662e92d4 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -18,6 +18,7 @@ from google.api_core import exceptions import google.api_core.retry +from google.api_core.future import polling import mock import pytest @@ -970,7 +971,7 @@ def test_result_default_wo_state(self): client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - self.assertIs(job.result(), job) + self.assertIs(job.result(retry=polling.DEFAULT_RETRY), job) begin_call = mock.call( method="POST", From a5d86a3c3e460ed684a9214bc59deebc9ae360c6 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 22 Jun 2023 08:19:35 -0400 Subject: [PATCH 132/536] chore(main): release 3.11.2 (#1592) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Chalmer Lowe --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9003d2bfc..5cf542670 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.2](https://github.com/googleapis/python-bigquery/compare/v3.11.1...v3.11.2) (2023-06-21) + + +### Bug Fixes + +* Updates tests based on revised hacker_news tables ([#1591](https://github.com/googleapis/python-bigquery/issues/1591)) ([d73cf49](https://github.com/googleapis/python-bigquery/commit/d73cf495b8dfa032a43dc1d58599d0691aaa0efb)) + ## [3.11.1](https://github.com/googleapis/python-bigquery/compare/v3.11.0...v3.11.1) (2023-06-09) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 90c53a0dd..ced5a95a7 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.11.1" +__version__ = "3.11.2" From 6c1ab802b09124ba837d6d5358962e3fce2d4a2c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 22 Jun 2023 10:30:13 -0500 Subject: [PATCH 133/536] fix: type annotations include Optional when None is accepted (#1554) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #1545 🦕 --- google/cloud/bigquery/_job_helpers.py | 4 +- google/cloud/bigquery/client.py | 108 ++++++++++++----------- google/cloud/bigquery/dataset.py | 2 +- google/cloud/bigquery/job/base.py | 21 +++-- google/cloud/bigquery/job/query.py | 10 +-- google/cloud/bigquery/routine/routine.py | 2 +- google/cloud/bigquery/schema.py | 4 +- google/cloud/bigquery/table.py | 8 +- 8 files changed, 86 insertions(+), 73 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 57846b190..09daaa2a2 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -64,7 +64,7 @@ def query_jobs_insert( job_config: Optional[job.QueryJobConfig], job_id: Optional[str], job_id_prefix: Optional[str], - location: str, + location: Optional[str], project: str, retry: retries.Retry, timeout: Optional[float], @@ -215,7 +215,7 @@ def query_jobs_query( client: "Client", query: str, job_config: Optional[job.QueryJobConfig], - location: str, + location: Optional[str], project: str, retry: retries.Retry, timeout: Optional[float], diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index d81816d41..5a929fea4 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -307,7 +307,7 @@ def close(self): def get_service_account_email( self, - project: str = None, + project: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> str: @@ -355,7 +355,7 @@ def get_service_account_email( def list_projects( self, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -417,11 +417,11 @@ def api_request(*args, **kwargs): def list_datasets( self, - project: str = None, + project: Optional[str] = None, include_all: bool = False, - filter: str = None, + filter: Optional[str] = None, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -498,7 +498,9 @@ def api_request(*args, **kwargs): page_size=page_size, ) - def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: + def dataset( + self, dataset_id: str, project: Optional[str] = None + ) -> DatasetReference: """Deprecated: Construct a reference to a dataset. .. deprecated:: 1.24.0 @@ -890,7 +892,7 @@ def set_iam_policy( self, table: Union[Table, TableReference, TableListItem, str], policy: Policy, - updateMask: str = None, + updateMask: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Policy: @@ -1350,7 +1352,7 @@ def list_models( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -1427,7 +1429,7 @@ def list_routines( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -1504,7 +1506,7 @@ def list_tables( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -1862,9 +1864,9 @@ def _get_query_results( self, job_id: str, retry: retries.Retry, - project: str = None, + project: Optional[str] = None, timeout_ms: Optional[int] = None, - location: str = None, + location: Optional[str] = None, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -2039,8 +2041,8 @@ def create_job( def get_job( self, job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], - project: str = None, - location: str = None, + project: Optional[str] = None, + location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: @@ -2103,8 +2105,8 @@ def get_job( def cancel_job( self, job_id: str, - project: str = None, - location: str = None, + project: Optional[str] = None, + location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: @@ -2181,12 +2183,12 @@ def cancel_job( def list_jobs( self, - project: str = None, + project: Optional[str] = None, parent_job: Optional[Union[QueryJob, str]] = None, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, all_users: bool = None, - state_filter: str = None, + state_filter: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, @@ -2297,11 +2299,11 @@ def load_table_from_uri( self, source_uris: Union[str, Sequence[str]], destination: Union[Table, TableReference, TableListItem, str], - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: @@ -2386,11 +2388,11 @@ def load_table_from_file( rewind: bool = False, size: Optional[int] = None, num_retries: int = _DEFAULT_NUM_RETRIES, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2494,11 +2496,11 @@ def load_table_from_dataframe( dataframe: "pandas.DataFrame", destination: Union[Table, TableReference, str], num_retries: int = _DEFAULT_NUM_RETRIES, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, parquet_compression: str = "snappy", timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: @@ -2751,11 +2753,11 @@ def load_table_from_json( json_rows: Iterable[Dict[str, Any]], destination: Union[Table, TableReference, TableListItem, str], num_retries: int = _DEFAULT_NUM_RETRIES, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -3064,10 +3066,10 @@ def copy_table( Sequence[Union[Table, TableReference, TableListItem, str]], ], destination: Union[Table, TableReference, TableListItem, str], - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, @@ -3170,10 +3172,10 @@ def extract_table( self, source: Union[Table, TableReference, TableListItem, Model, ModelReference, str], destination_uris: Union[str, Sequence[str]], - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, @@ -3270,10 +3272,10 @@ def query( self, query: str, job_config: QueryJobConfig = None, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, @@ -3563,7 +3565,7 @@ def insert_rows_json( ] = AutoRowIDs.GENERATE_UUID, skip_invalid_rows: bool = None, ignore_unknown_values: bool = None, - template_suffix: str = None, + template_suffix: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Sequence[dict]: @@ -3755,7 +3757,7 @@ def list_rows( table: Union[Table, TableListItem, TableReference, str], selected_fields: Sequence[SchemaField] = None, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, start_index: Optional[int] = None, page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 0edd29359..513c32d9c 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -139,7 +139,7 @@ def from_api_repr(cls, resource: dict) -> "DatasetReference": @classmethod def from_string( - cls, dataset_id: str, default_project: str = None + cls, dataset_id: str, default_project: Optional[str] = None ) -> "DatasetReference": """Construct a dataset reference from dataset ID string. diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 4073e0137..a6267be41 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -703,7 +703,10 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): self._set_properties(api_response) def exists( - self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + client=None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ) -> bool: """API call: test for the existence of the job via a GET request @@ -748,7 +751,10 @@ def exists( return True def reload( - self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + client=None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ): """API call: refresh job properties via a GET request. @@ -785,7 +791,10 @@ def reload( self._set_properties(api_response) def cancel( - self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + client=None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ) -> bool: """API call: cancel job via a POST request @@ -855,7 +864,7 @@ def _set_future_result(self): def done( self, retry: "retries.Retry" = DEFAULT_RETRY, - timeout: float = None, + timeout: Optional[float] = None, reload: bool = True, ) -> bool: """Checks if the job is complete. @@ -881,7 +890,9 @@ def done( return self.state == _DONE_STATE def result( # type: ignore # (signature complaint) - self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 315d8201c..7dddc8278 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1317,7 +1317,7 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): raise def _reload_query_results( - self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None ): """Refresh the cached query results. @@ -1405,7 +1405,7 @@ def result( # type: ignore # (complaints about the overloaded signature) page_size: Optional[int] = None, max_results: Optional[int] = None, retry: "retries.Retry" = DEFAULT_RETRY, - timeout: float = None, + timeout: Optional[float] = None, start_index: Optional[int] = None, job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: @@ -1557,7 +1557,7 @@ def do_get_result(): # that should only exist here in the QueryJob method. def to_arrow( self, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, @@ -1634,7 +1634,7 @@ def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_as_object: bool = False, @@ -1820,7 +1820,7 @@ def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_column: Optional[str] = None, diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 36ed03728..ef33d507e 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -537,7 +537,7 @@ def from_api_repr(cls, resource: dict) -> "RoutineReference": @classmethod def from_string( - cls, routine_id: str, default_project: str = None + cls, routine_id: str, default_project: Optional[str] = None ) -> "RoutineReference": """Factory: construct a routine reference from routine ID string. diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index ebf34e4cd..20a1bc92f 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -16,7 +16,7 @@ import collections import enum -from typing import Any, Dict, Iterable, Union +from typing import Any, Dict, Iterable, Optional, Union from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -124,7 +124,7 @@ def __init__( name: str, field_type: str, mode: str = "NULLABLE", - default_value_expression: str = None, + default_value_expression: Optional[str] = None, description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, fields: Iterable["SchemaField"] = (), policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index bf4a90317..462447d51 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -228,7 +228,7 @@ def __init__(self, dataset_ref: "DatasetReference", table_id: str): @classmethod def from_string( - cls, table_id: str, default_project: str = None + cls, table_id: str, default_project: Optional[str] = None ) -> "TableReference": """Construct a table reference from table ID string. @@ -1745,7 +1745,7 @@ def to_arrow_iterable( # changes to job.QueryJob.to_arrow() def to_arrow( self, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, ) -> "pyarrow.Table": @@ -1932,7 +1932,7 @@ def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_as_object: bool = False, bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, @@ -2230,7 +2230,7 @@ def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_column: Optional[str] = None, ) -> "geopandas.GeoDataFrame": From 4650b7f2c1c5fb4b3f0567f420c82c2c48917dd2 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Tue, 27 Jun 2023 10:37:23 -0400 Subject: [PATCH 134/536] chore: update noxfile for docfx job (#1594) * chore: update noxfile for docfx job * chore: Update noxfile.py --- noxfile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 8464e4980..57e534890 100644 --- a/noxfile.py +++ b/noxfile.py @@ -425,13 +425,15 @@ def docs(session): ) -@nox.session(python=DEFAULT_PYTHON_VERSION) +@nox.session(python="3.9") def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") session.install( - "sphinx==4.0.2", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "gcp-sphinx-docfx-yaml", + "alabaster", + "recommonmark", ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) From 206d40baf09c29f46d4191137258df510c2a6cdc Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 27 Jun 2023 11:53:18 -0500 Subject: [PATCH 135/536] chore(main): release 3.11.3 (#1593) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cf542670..a0af641cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.3](https://github.com/googleapis/python-bigquery/compare/v3.11.2...v3.11.3) (2023-06-27) + + +### Bug Fixes + +* Type annotations include Optional when None is accepted ([#1554](https://github.com/googleapis/python-bigquery/issues/1554)) ([6c1ab80](https://github.com/googleapis/python-bigquery/commit/6c1ab802b09124ba837d6d5358962e3fce2d4a2c)) + ## [3.11.2](https://github.com/googleapis/python-bigquery/compare/v3.11.1...v3.11.2) (2023-06-21) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index ced5a95a7..9e1402d15 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.11.2" +__version__ = "3.11.3" From 46ca0bc8957f2bce8951035049710c4701aba1d9 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Wed, 28 Jun 2023 08:25:15 -0400 Subject: [PATCH 136/536] chore: update docs nox session (#1597) --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 57e534890..93616485f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -402,7 +402,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python=DEFAULT_PYTHON_VERSION) +@nox.session(python="3.9") def docs(session): """Build the docs.""" From 3fbe371746603863b0014b086d31f456de37b680 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 29 Jun 2023 11:26:14 -0400 Subject: [PATCH 137/536] refactor: refactored _get_final_span_attributes() for clarity, simplicity (#1602) Refactors the _get_final_span_attributes() function for simplicity and clarity. * adds docstring * removes several lines of redundant/unnecessary code * renames temporary variable for clarity --- .../cloud/bigquery/opentelemetry_tracing.py | 39 +++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index 0e1187c6b..be02c1686 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -87,21 +87,38 @@ def create_span(name, attributes=None, client=None, job_ref=None): def _get_final_span_attributes(attributes=None, client=None, job_ref=None): - final_attributes = {} - final_attributes.update(_default_attributes.copy()) + """Compiles attributes from: client, job_ref, user-provided attributes. + + Attributes from all of these sources are merged together. Note the + attributes are added sequentially based on perceived order of precendence: + i.e. attributes added last may overwrite attributes added earlier. + + Args: + attributes (Optional[dict]): + Additional attributes that pertain to + the specific API call (i.e. not a default attribute) + + client (Optional[google.cloud.bigquery.client.Client]): + Pass in a Client object to extract any attributes that may be + relevant to it and add them to the final_attributes + + job_ref (Optional[google.cloud.bigquery.job._AsyncJob]) + Pass in a _AsyncJob object to extract any attributes that may be + relevant to it and add them to the final_attributes. + + Returns: dict + """ + + collected_attributes = _default_attributes.copy() + if client: - client_attributes = _set_client_attributes(client) - final_attributes.update(client_attributes) + collected_attributes.update(_set_client_attributes(client)) if job_ref: - job_attributes = _set_job_attributes(job_ref) - final_attributes.update(job_attributes) + collected_attributes.update(_set_job_attributes(job_ref)) if attributes: - final_attributes.update(attributes) - - filtered = {k: v for k, v in final_attributes.items() if v is not None} - final_attributes.clear() - final_attributes.update(filtered) + collected_attributes.update(attributes) + final_attributes = {k: v for k, v in collected_attributes.items() if v is not None} return final_attributes From 130450a3a5d866d2d48ef0c396e9f37d22d0b1d5 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 29 Jun 2023 10:08:13 -0700 Subject: [PATCH 138/536] chore: store artifacts in placer (#1599) Source-Link: https://github.com/googleapis/synthtool/commit/cb960373d12d20f8dc38beee2bf884d49627165e Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2d816f26f728ac8b24248741e7d4c461c09764ef9f7be3684d557c9632e46dbd Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/release/common.cfg | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 02a4dedce..98994f474 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:240b5bcc2bafd450912d2da2be15e62bc6de2cf839823ae4bf94d4f392b451dc -# created: 2023-06-03T21:25:37.968717478Z + digest: sha256:2d816f26f728ac8b24248741e7d4c461c09764ef9f7be3684d557c9632e46dbd +# created: 2023-06-28T17:03:33.371210701Z diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 6ae81b743..cb8bbaa2e 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -38,3 +38,12 @@ env_vars: { key: "SECRET_MANAGER_KEYS" value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" } + +# Store the packages we uploaded to PyPI. That way, we have a record of exactly +# what we published, which we can use to generate SBOMs and attestations. +action { + define_artifacts { + regex: "github/python-bigquery/**/*.tar.gz" + strip_prefix: "github/python-bigquery" + } +} From 668205599ab9f5bcf34266f4dd3cfc2966783fa7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 6 Jul 2023 12:52:57 -0500 Subject: [PATCH 139/536] doc: in query retry design, note that location can be required (#1595) In response to internal issue 285136859. Co-authored-by: Anthonios Partheniou --- docs/design/query-retries.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design/query-retries.md b/docs/design/query-retries.md index 1bac82f5c..08d75302b 100644 --- a/docs/design/query-retries.md +++ b/docs/design/query-retries.md @@ -73,7 +73,7 @@ value, the client library uses the jobs.insert REST API to start a query job. Before it issues this request, it sets a job ID. This job ID remains constant across API retries. -If the job ID was randomly generated, and the jobs.insert request and all retries fail, the client library sends a request to the jobs.get API. This covers the case when a query request succeeded, but there was a transient issue that prevented the client from receiving a successful response. +If the job ID was randomly generated, and the jobs.insert request and all retries fail, the client library sends a request to the jobs.get API. This covers the case when a query request succeeded, but there was a transient issue that prevented the client from receiving a successful response. Note: `jobs.get` requires the location of the query. It will fail with 404 if the location is not specified and the job is not in the US multi-region. #### Retrying the jobs.query API via the retry parameter From 2bbf990e9a46a11359f25ba0d99792a4590fd410 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 10 Jul 2023 17:31:55 +0200 Subject: [PATCH 140/536] chore(deps): update all dependencies (#1577) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 82a1daadc..c4bd8f2e2 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,8 +12,8 @@ geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.13.0; python_version >= '3.8' google-api-core==2.11.0 -google-auth==2.19.0 -google-cloud-bigquery==3.10.0 +google-auth==2.19.1 +google-cloud-bigquery==3.11.0 google-cloud-bigquery-storage==2.20.0 google-cloud-core==2.3.2 google-crc32c==1.5.0 @@ -40,6 +40,6 @@ requests==2.31.0 rsa==4.9 Shapely==2.0.1 six==1.16.0 -typing-extensions==4.6.2 +typing-extensions==4.6.3 typing-inspect==0.9.0 urllib3==1.26.15 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index b545916c3..29d616021 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -5,11 +5,11 @@ grpcio==1.54.2 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.13.2; python_version >= '3.9' +ipython==8.14.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.2; python_version >= '3.8' pyarrow==12.0.0 pytz==2023.3 -typing-extensions==4.6.2 +typing-extensions==4.6.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d2878d202..8b9326101 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.1.1 -google-cloud-bigquery==3.10.0 +google-cloud-bigquery==3.11.0 google-cloud-bigquery-storage==2.20.0 google-auth-oauthlib==1.0.0 grpcio==1.54.2 ipywidgets==8.0.6 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.13.2; python_version >= '3.9' +ipython==8.14.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.2; python_version >= '3.8' pyarrow==12.0.0 pytz==2023.3 -typing-extensions==4.6.2 +typing-extensions==4.6.3 From 319f93872f6283dc1bc377f6d82cfda0f5d5d34f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 10 Jul 2023 18:46:18 +0200 Subject: [PATCH 141/536] chore(deps): update all dependencies (#1606) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: Anthonios Partheniou --- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 34 ++++++++++++------------- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 14 +++++----- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 16 ++++++------ 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 3c3afdcb1..b3772a888 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.3.1 +pytest==7.4.0 mock==5.0.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c4bd8f2e2..b05446e99 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,8 +1,8 @@ attrs==23.1.0 certifi==2023.5.7 cffi==1.15.1 -charset-normalizer==3.1.0 -click==8.1.3 +charset-normalizer==3.2.0 +click==8.1.4 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' @@ -10,29 +10,29 @@ db-dtypes==1.1.1 Fiona==1.9.4.post1 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' -geopandas==0.13.0; python_version >= '3.8' -google-api-core==2.11.0 -google-auth==2.19.1 -google-cloud-bigquery==3.11.0 -google-cloud-bigquery-storage==2.20.0 -google-cloud-core==2.3.2 +geopandas==0.13.2; python_version >= '3.8' +google-api-core==2.11.1 +google-auth==2.21.0 +google-cloud-bigquery==3.11.3 +google-cloud-bigquery-storage==2.22.0 +google-cloud-core==2.3.3 google-crc32c==1.5.0 google-resumable-media==2.5.0 -googleapis-common-protos==1.59.0 -grpcio==1.54.2 +googleapis-common-protos==1.59.1 +grpcio==1.56.0 idna==3.4 -libcst==1.0.0 -munch==3.0.0 +libcst==1.0.1 +munch==4.0.0 mypy-extensions==1.0.0 packaging==23.1 pandas===1.3.5; python_version == '3.7' -pandas==2.0.2; python_version >= '3.8' -proto-plus==1.22.2 -pyarrow==12.0.0 +pandas==2.0.3; python_version >= '3.8' +proto-plus==1.22.3 +pyarrow==12.0.1 pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 -pyparsing==3.0.9 +pyparsing==3.1.0 python-dateutil==2.8.2 pytz==2023.3 PyYAML==6.0 @@ -40,6 +40,6 @@ requests==2.31.0 rsa==4.9 Shapely==2.0.1 six==1.16.0 -typing-extensions==4.6.3 +typing-extensions==4.7.1 typing-inspect==0.9.0 urllib3==1.26.15 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 9fa68a930..4077bd8dc 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.3.1 +pytest==7.4.0 mock==5.0.2 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 29d616021..edf3dc4b6 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.1.1 -google-cloud-bigquery-storage==2.20.0 +google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.54.2 -ipywidgets==8.0.6 +grpcio==1.56.0 +ipywidgets==8.0.7 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.1; python_version >= '3.8' +matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.2; python_version >= '3.8' -pyarrow==12.0.0 +pandas==2.0.3; python_version >= '3.8' +pyarrow==12.0.1 pytz==2023.3 -typing-extensions==4.6.3 +typing-extensions==4.7.1 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 9fa68a930..4077bd8dc 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.3.1 +pytest==7.4.0 mock==5.0.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8b9326101..c715a450f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.1.1 -google-cloud-bigquery==3.11.0 -google-cloud-bigquery-storage==2.20.0 +google-cloud-bigquery==3.11.3 +google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.54.2 -ipywidgets==8.0.6 +grpcio==1.56.0 +ipywidgets==8.0.7 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.1; python_version >= '3.8' +matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.2; python_version >= '3.8' -pyarrow==12.0.0 +pandas==2.0.3; python_version >= '3.8' +pyarrow==12.0.1 pytz==2023.3 -typing-extensions==4.6.3 +typing-extensions==4.7.1 From 564c7de6a8d3ac3c27f7a481284c92d46d7b5ada Mon Sep 17 00:00:00 2001 From: Alvaro Viebrantz Date: Thu, 13 Jul 2023 16:46:49 -0400 Subject: [PATCH 142/536] test: enable copy table tests (#1609) --- samples/tests/test_copy_table.py | 2 -- samples/tests/test_copy_table_cmek.py | 2 -- tests/system/test_client.py | 2 -- 3 files changed, 6 deletions(-) diff --git a/samples/tests/test_copy_table.py b/samples/tests/test_copy_table.py index d5a6c121e..3953e3162 100644 --- a/samples/tests/test_copy_table.py +++ b/samples/tests/test_copy_table.py @@ -28,8 +28,6 @@ def test_copy_table( random_table_id: str, client: "bigquery.Client", ) -> None: - pytest.skip("b/210907595: copy fails for shakespeare table") - copy_table.copy_table(table_with_data_id, random_table_id) out, err = capsys.readouterr() assert "A copy of the table created." in out diff --git a/samples/tests/test_copy_table_cmek.py b/samples/tests/test_copy_table_cmek.py index 1bdec2f35..7cac15723 100644 --- a/samples/tests/test_copy_table_cmek.py +++ b/samples/tests/test_copy_table_cmek.py @@ -23,8 +23,6 @@ def test_copy_table_cmek( table_with_data_id: str, kms_key_name: str, ) -> None: - pytest.skip("b/210907595: copy fails for shakespeare table") - copy_table_cmek.copy_table_cmek(random_table_id, table_with_data_id, kms_key_name) out, err = capsys.readouterr() assert "A copy of the table created" in out diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f4757e30f..8fd532f4c 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1358,8 +1358,6 @@ def test_extract_table(self): self.assertIn("Bharney Rhubble", got) def test_copy_table(self): - pytest.skip("b/210907595: copy fails for shakespeare table") - # If we create a new table to copy from, the test won't work # because the new rows will be stored in the streaming buffer, # and copy jobs don't read the streaming buffer. From 344b7246931e76ea5f507533aa3b81f42ab8c28c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 17 Jul 2023 13:04:14 -0400 Subject: [PATCH 143/536] build(deps): [autoapprove] bump cryptography from 41.0.0 to 41.0.2 (#1611) Source-Link: https://github.com/googleapis/synthtool/commit/d6103f4a3540ba60f633a9e25c37ec5fe7e6286d Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:39f0f3f2be02ef036e297e376fe3b6256775576da8a6ccb1d5eeb80f4c8bf8fb Co-authored-by: Owl Bot --- .flake8 | 2 +- .github/.OwlBot.lock.yaml | 4 +-- .github/auto-label.yaml | 2 +- .kokoro/build.sh | 2 +- .kokoro/docker/docs/Dockerfile | 2 +- .kokoro/populate-secrets.sh | 2 +- .kokoro/publish-docs.sh | 2 +- .kokoro/release.sh | 2 +- .kokoro/requirements.txt | 44 +++++++++++++++------------- .kokoro/test-samples-against-head.sh | 2 +- .kokoro/test-samples-impl.sh | 2 +- .kokoro/test-samples.sh | 2 +- .kokoro/trampoline.sh | 2 +- .kokoro/trampoline_v2.sh | 2 +- .pre-commit-config.yaml | 2 +- .trampolinerc | 4 +-- MANIFEST.in | 2 +- docs/conf.py | 2 +- scripts/decrypt-secrets.sh | 2 +- scripts/readme-gen/readme_gen.py | 18 ++++++------ setup.cfg | 2 +- 21 files changed, 53 insertions(+), 51 deletions(-) diff --git a/.flake8 b/.flake8 index 2e4387498..87f6e408c 100644 --- a/.flake8 +++ b/.flake8 @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 98994f474..ae4a522b9 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2d816f26f728ac8b24248741e7d4c461c09764ef9f7be3684d557c9632e46dbd -# created: 2023-06-28T17:03:33.371210701Z + digest: sha256:39f0f3f2be02ef036e297e376fe3b6256775576da8a6ccb1d5eeb80f4c8bf8fb +# created: 2023-07-17T15:20:13.819193964Z diff --git a/.github/auto-label.yaml b/.github/auto-label.yaml index 41bff0b53..b2016d119 100644 --- a/.github/auto-label.yaml +++ b/.github/auto-label.yaml @@ -1,4 +1,4 @@ -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 4d6a1d0f6..0cb0d0dd0 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2018 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index f8137d0ae..8e39a2cc4 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/populate-secrets.sh b/.kokoro/populate-secrets.sh index f52514257..6f3972140 100755 --- a/.kokoro/populate-secrets.sh +++ b/.kokoro/populate-secrets.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC. +# Copyright 2023 Google LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 1c4d62370..9eafe0be3 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/release.sh b/.kokoro/release.sh index c6a7c9460..078fc1c20 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index c7929db6d..67d70a110 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -113,26 +113,30 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==41.0.0 \ - --hash=sha256:0ddaee209d1cf1f180f1efa338a68c4621154de0afaef92b89486f5f96047c55 \ - --hash=sha256:14754bcdae909d66ff24b7b5f166d69340ccc6cb15731670435efd5719294895 \ - --hash=sha256:344c6de9f8bda3c425b3a41b319522ba3208551b70c2ae00099c205f0d9fd3be \ - --hash=sha256:34d405ea69a8b34566ba3dfb0521379b210ea5d560fafedf9f800a9a94a41928 \ - --hash=sha256:3680248309d340fda9611498a5319b0193a8dbdb73586a1acf8109d06f25b92d \ - --hash=sha256:3c5ef25d060c80d6d9f7f9892e1d41bb1c79b78ce74805b8cb4aa373cb7d5ec8 \ - --hash=sha256:4ab14d567f7bbe7f1cdff1c53d5324ed4d3fc8bd17c481b395db224fb405c237 \ - --hash=sha256:5c1f7293c31ebc72163a9a0df246f890d65f66b4a40d9ec80081969ba8c78cc9 \ - --hash=sha256:6b71f64beeea341c9b4f963b48ee3b62d62d57ba93eb120e1196b31dc1025e78 \ - --hash=sha256:7d92f0248d38faa411d17f4107fc0bce0c42cae0b0ba5415505df72d751bf62d \ - --hash=sha256:8362565b3835ceacf4dc8f3b56471a2289cf51ac80946f9087e66dc283a810e0 \ - --hash=sha256:84a165379cb9d411d58ed739e4af3396e544eac190805a54ba2e0322feb55c46 \ - --hash=sha256:88ff107f211ea696455ea8d911389f6d2b276aabf3231bf72c8853d22db755c5 \ - --hash=sha256:9f65e842cb02550fac96536edb1d17f24c0a338fd84eaf582be25926e993dde4 \ - --hash=sha256:a4fc68d1c5b951cfb72dfd54702afdbbf0fb7acdc9b7dc4301bbf2225a27714d \ - --hash=sha256:b7f2f5c525a642cecad24ee8670443ba27ac1fab81bba4cc24c7b6b41f2d0c75 \ - --hash=sha256:b846d59a8d5a9ba87e2c3d757ca019fa576793e8758174d3868aecb88d6fc8eb \ - --hash=sha256:bf8fc66012ca857d62f6a347007e166ed59c0bc150cefa49f28376ebe7d992a2 \ - --hash=sha256:f5d0bf9b252f30a31664b6f64432b4730bb7038339bd18b1fafe129cfc2be9be +cryptography==41.0.2 \ + --hash=sha256:01f1d9e537f9a15b037d5d9ee442b8c22e3ae11ce65ea1f3316a41c78756b711 \ + --hash=sha256:079347de771f9282fbfe0e0236c716686950c19dee1b76240ab09ce1624d76d7 \ + --hash=sha256:182be4171f9332b6741ee818ec27daff9fb00349f706629f5cbf417bd50e66fd \ + --hash=sha256:192255f539d7a89f2102d07d7375b1e0a81f7478925b3bc2e0549ebf739dae0e \ + --hash=sha256:2a034bf7d9ca894720f2ec1d8b7b5832d7e363571828037f9e0c4f18c1b58a58 \ + --hash=sha256:342f3767e25876751e14f8459ad85e77e660537ca0a066e10e75df9c9e9099f0 \ + --hash=sha256:439c3cc4c0d42fa999b83ded80a9a1fb54d53c58d6e59234cfe97f241e6c781d \ + --hash=sha256:49c3222bb8f8e800aead2e376cbef687bc9e3cb9b58b29a261210456a7783d83 \ + --hash=sha256:674b669d5daa64206c38e507808aae49904c988fa0a71c935e7006a3e1e83831 \ + --hash=sha256:7a9a3bced53b7f09da251685224d6a260c3cb291768f54954e28f03ef14e3766 \ + --hash=sha256:7af244b012711a26196450d34f483357e42aeddb04128885d95a69bd8b14b69b \ + --hash=sha256:7d230bf856164de164ecb615ccc14c7fc6de6906ddd5b491f3af90d3514c925c \ + --hash=sha256:84609ade00a6ec59a89729e87a503c6e36af98ddcd566d5f3be52e29ba993182 \ + --hash=sha256:9a6673c1828db6270b76b22cc696f40cde9043eb90373da5c2f8f2158957f42f \ + --hash=sha256:9b6d717393dbae53d4e52684ef4f022444fc1cce3c48c38cb74fca29e1f08eaa \ + --hash=sha256:9c3fe6534d59d071ee82081ca3d71eed3210f76ebd0361798c74abc2bcf347d4 \ + --hash=sha256:a719399b99377b218dac6cf547b6ec54e6ef20207b6165126a280b0ce97e0d2a \ + --hash=sha256:b332cba64d99a70c1e0836902720887fb4529ea49ea7f5462cf6640e095e11d2 \ + --hash=sha256:d124682c7a23c9764e54ca9ab5b308b14b18eba02722b8659fb238546de83a76 \ + --hash=sha256:d73f419a56d74fef257955f51b18d046f3506270a5fd2ac5febbfa259d6c0fa5 \ + --hash=sha256:f0dc40e6f7aa37af01aba07277d3d64d5a03dc66d682097541ec4da03cc140ee \ + --hash=sha256:f14ad275364c8b4e525d018f6716537ae7b6d369c094805cae45300847e0894f \ + --hash=sha256:f772610fe364372de33d76edcd313636a25684edb94cee53fd790195f5989d14 # via # gcp-releasetool # secretstorage diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh index ba3a707b0..63ac41dfa 100755 --- a/.kokoro/test-samples-against-head.sh +++ b/.kokoro/test-samples-against-head.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 2c6500cae..5a0f5fab6 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index 11c042d34..50b35a48c 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/trampoline.sh b/.kokoro/trampoline.sh index f39236e94..d85b1f267 100755 --- a/.kokoro/trampoline.sh +++ b/.kokoro/trampoline.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2017 Google Inc. +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 4af6cdc26..59a7cf3a9 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5405cc8ff..9e3898fd1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.trampolinerc b/.trampolinerc index 0eee72ab6..a7dfeb42c 100644 --- a/.trampolinerc +++ b/.trampolinerc @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Template for .trampolinerc - # Add required env vars here. required_envvars+=( ) diff --git a/MANIFEST.in b/MANIFEST.in index e783f4c62..e0a667053 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/docs/conf.py b/docs/conf.py index 5c83fd79e..d0468e25a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/decrypt-secrets.sh b/scripts/decrypt-secrets.sh index 21f6d2a26..0018b421d 100755 --- a/scripts/decrypt-secrets.sh +++ b/scripts/decrypt-secrets.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2015 Google Inc. All rights reserved. +# Copyright 2023 Google LLC All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/readme-gen/readme_gen.py b/scripts/readme-gen/readme_gen.py index 91b59676b..1acc11983 100644 --- a/scripts/readme-gen/readme_gen.py +++ b/scripts/readme-gen/readme_gen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2016 Google Inc +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,17 +33,17 @@ autoescape=True, ) -README_TMPL = jinja_env.get_template('README.tmpl.rst') +README_TMPL = jinja_env.get_template("README.tmpl.rst") def get_help(file): - return subprocess.check_output(['python', file, '--help']).decode() + return subprocess.check_output(["python", file, "--help"]).decode() def main(): parser = argparse.ArgumentParser() - parser.add_argument('source') - parser.add_argument('--destination', default='README.rst') + parser.add_argument("source") + parser.add_argument("--destination", default="README.rst") args = parser.parse_args() @@ -51,9 +51,9 @@ def main(): root = os.path.dirname(source) destination = os.path.join(root, args.destination) - jinja_env.globals['get_help'] = get_help + jinja_env.globals["get_help"] = get_help - with io.open(source, 'r') as f: + with io.open(source, "r") as f: config = yaml.load(f) # This allows get_help to execute in the right directory. @@ -61,9 +61,9 @@ def main(): output = README_TMPL.render(config) - with io.open(destination, 'w') as f: + with io.open(destination, "w") as f: f.write(output) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/setup.cfg b/setup.cfg index 25892161f..37b63aa49 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From db755ce5d2ae21e458f33f02cf63d2e5fbc45cf5 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 19 Jul 2023 12:03:06 -0400 Subject: [PATCH 144/536] fix: updates typing in function definitions (#1613) --- google/cloud/bigquery/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 5a929fea4..11cceea42 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3070,7 +3070,7 @@ def copy_table( job_id_prefix: Optional[str] = None, location: Optional[str] = None, project: Optional[str] = None, - job_config: CopyJobConfig = None, + job_config: Optional[CopyJobConfig] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.CopyJob: @@ -3176,7 +3176,7 @@ def extract_table( job_id_prefix: Optional[str] = None, location: Optional[str] = None, project: Optional[str] = None, - job_config: ExtractJobConfig = None, + job_config: Optional[ExtractJobConfig] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, source_type: str = "Table", @@ -3271,7 +3271,7 @@ def extract_table( def query( self, query: str, - job_config: QueryJobConfig = None, + job_config: Optional[QueryJobConfig] = None, job_id: Optional[str] = None, job_id_prefix: Optional[str] = None, location: Optional[str] = None, From 3a47b9843cd15b9c58117c5010b05f4ba2588127 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 19 Jul 2023 19:04:49 -0400 Subject: [PATCH 145/536] chore(main): release 3.11.4 (#1615) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0af641cf..cf64e2222 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.4](https://github.com/googleapis/python-bigquery/compare/v3.11.3...v3.11.4) (2023-07-19) + + +### Bug Fixes + +* Updates typing in function definitions ([#1613](https://github.com/googleapis/python-bigquery/issues/1613)) ([db755ce](https://github.com/googleapis/python-bigquery/commit/db755ce5d2ae21e458f33f02cf63d2e5fbc45cf5)) + ## [3.11.3](https://github.com/googleapis/python-bigquery/compare/v3.11.2...v3.11.3) (2023-06-27) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 9e1402d15..a97ccc0c8 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.11.3" +__version__ = "3.11.4" From 36a97a81322591606e63ac25e45c6ae857c6f2a7 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 21 Jul 2023 09:44:07 -0400 Subject: [PATCH 146/536] build(deps): [autoapprove] bump pygments from 2.13.0 to 2.15.0 (#1618) Source-Link: https://github.com/googleapis/synthtool/commit/eaef28efd179e6eeb9f4e9bf697530d074a6f3b9 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:f8ca7655fa8a449cadcabcbce4054f593dcbae7aeeab34aa3fcc8b5cf7a93c9e Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index ae4a522b9..17c21d96d 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:39f0f3f2be02ef036e297e376fe3b6256775576da8a6ccb1d5eeb80f4c8bf8fb -# created: 2023-07-17T15:20:13.819193964Z + digest: sha256:f8ca7655fa8a449cadcabcbce4054f593dcbae7aeeab34aa3fcc8b5cf7a93c9e +# created: 2023-07-21T02:12:46.49799314Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 67d70a110..b563eb284 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -396,9 +396,9 @@ pycparser==2.21 \ --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 # via cffi -pygments==2.13.0 \ - --hash=sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1 \ - --hash=sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42 +pygments==2.15.0 \ + --hash=sha256:77a3299119af881904cd5ecd1ac6a66214b6e9bed1f2db16993b54adede64094 \ + --hash=sha256:f7e36cffc4c517fbc252861b9a6e4644ca0e5abadf9a113c72d1358ad09b9500 # via # readme-renderer # rich From 941716fb806b5fa64a4c9188155e191675d6df86 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 25 Jul 2023 17:02:34 +0200 Subject: [PATCH 147/536] chore(deps): update all dependencies (#1607) * chore(deps): update all dependencies * revert * revert --------- Co-authored-by: Anthonios Partheniou --- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index b3772a888..6585a560a 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ pytest==7.4.0 -mock==5.0.2 +mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index b05446e99..c5fe182af 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -2,7 +2,7 @@ attrs==23.1.0 certifi==2023.5.7 cffi==1.15.1 charset-normalizer==3.2.0 -click==8.1.4 +click==8.1.6 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' @@ -12,7 +12,7 @@ geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' geopandas==0.13.2; python_version >= '3.8' google-api-core==2.11.1 -google-auth==2.21.0 +google-auth==2.22.0 google-cloud-bigquery==3.11.3 google-cloud-bigquery-storage==2.22.0 google-cloud-core==2.3.3 @@ -35,7 +35,7 @@ pycparser==2.21 pyparsing==3.1.0 python-dateutil==2.8.2 pytz==2023.3 -PyYAML==6.0 +PyYAML==6.0.1 requests==2.31.0 rsa==4.9 Shapely==2.0.1 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 4077bd8dc..514f09705 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 pytest==7.4.0 -mock==5.0.2 +mock==5.1.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 4077bd8dc..514f09705 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 pytest==7.4.0 -mock==5.0.2 +mock==5.1.0 From 9a51e034e0269a168efcff4dd9da0c53b19655e8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Jul 2023 16:14:24 -0400 Subject: [PATCH 148/536] chore(deps): bump certifi in /samples/geography (#1627) Bumps [certifi](https://github.com/certifi/python-certifi) from 2023.5.7 to 2023.7.22. - [Commits](https://github.com/certifi/python-certifi/compare/2023.05.07...2023.07.22) --- updated-dependencies: - dependency-name: certifi dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c5fe182af..34873a5be 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==23.1.0 -certifi==2023.5.7 +certifi==2023.7.22 cffi==1.15.1 charset-normalizer==3.2.0 click==8.1.6 From 06868480547b1e8fec886dff021ac5a3a571d3d7 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 27 Jul 2023 05:56:00 -0400 Subject: [PATCH 149/536] build(deps): [autoapprove] bump certifi from 2022.12.7 to 2023.7.22 (#1629) Source-Link: https://github.com/googleapis/synthtool/commit/395d53adeeacfca00b73abf197f65f3c17c8f1e9 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:6c1cbc75c74b8bdd71dada2fa1677e9d6d78a889e9a70ee75b93d1d0543f96e1 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 17c21d96d..0ddd0e4d1 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:f8ca7655fa8a449cadcabcbce4054f593dcbae7aeeab34aa3fcc8b5cf7a93c9e -# created: 2023-07-21T02:12:46.49799314Z + digest: sha256:6c1cbc75c74b8bdd71dada2fa1677e9d6d78a889e9a70ee75b93d1d0543f96e1 +# created: 2023-07-25T21:01:10.396410762Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index b563eb284..76d9bba0f 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -20,9 +20,9 @@ cachetools==5.2.0 \ --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db # via google-auth -certifi==2022.12.7 \ - --hash=sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3 \ - --hash=sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18 +certifi==2023.7.22 \ + --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 \ + --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9 # via requests cffi==1.15.1 \ --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ From 781a7f8253633eca80d452fa7eaa209fed14f769 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 8 Aug 2023 11:09:40 -0400 Subject: [PATCH 150/536] build: [autoapprove] bump cryptography from 41.0.2 to 41.0.3 (#1632) * build: [autoapprove] bump cryptography from 41.0.2 to 41.0.3 Source-Link: https://github.com/googleapis/synthtool/commit/352b9d4c068ce7c05908172af128b294073bf53c Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7 * pin flake8 --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/requirements.txt | 48 +++++++++++++++++++-------------------- .pre-commit-config.yaml | 2 +- noxfile.py | 4 +++- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 0ddd0e4d1..a3da1b0d4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:6c1cbc75c74b8bdd71dada2fa1677e9d6d78a889e9a70ee75b93d1d0543f96e1 -# created: 2023-07-25T21:01:10.396410762Z + digest: sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7 +# created: 2023-08-02T10:53:29.114535628Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 76d9bba0f..029bd342d 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -113,30 +113,30 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==41.0.2 \ - --hash=sha256:01f1d9e537f9a15b037d5d9ee442b8c22e3ae11ce65ea1f3316a41c78756b711 \ - --hash=sha256:079347de771f9282fbfe0e0236c716686950c19dee1b76240ab09ce1624d76d7 \ - --hash=sha256:182be4171f9332b6741ee818ec27daff9fb00349f706629f5cbf417bd50e66fd \ - --hash=sha256:192255f539d7a89f2102d07d7375b1e0a81f7478925b3bc2e0549ebf739dae0e \ - --hash=sha256:2a034bf7d9ca894720f2ec1d8b7b5832d7e363571828037f9e0c4f18c1b58a58 \ - --hash=sha256:342f3767e25876751e14f8459ad85e77e660537ca0a066e10e75df9c9e9099f0 \ - --hash=sha256:439c3cc4c0d42fa999b83ded80a9a1fb54d53c58d6e59234cfe97f241e6c781d \ - --hash=sha256:49c3222bb8f8e800aead2e376cbef687bc9e3cb9b58b29a261210456a7783d83 \ - --hash=sha256:674b669d5daa64206c38e507808aae49904c988fa0a71c935e7006a3e1e83831 \ - --hash=sha256:7a9a3bced53b7f09da251685224d6a260c3cb291768f54954e28f03ef14e3766 \ - --hash=sha256:7af244b012711a26196450d34f483357e42aeddb04128885d95a69bd8b14b69b \ - --hash=sha256:7d230bf856164de164ecb615ccc14c7fc6de6906ddd5b491f3af90d3514c925c \ - --hash=sha256:84609ade00a6ec59a89729e87a503c6e36af98ddcd566d5f3be52e29ba993182 \ - --hash=sha256:9a6673c1828db6270b76b22cc696f40cde9043eb90373da5c2f8f2158957f42f \ - --hash=sha256:9b6d717393dbae53d4e52684ef4f022444fc1cce3c48c38cb74fca29e1f08eaa \ - --hash=sha256:9c3fe6534d59d071ee82081ca3d71eed3210f76ebd0361798c74abc2bcf347d4 \ - --hash=sha256:a719399b99377b218dac6cf547b6ec54e6ef20207b6165126a280b0ce97e0d2a \ - --hash=sha256:b332cba64d99a70c1e0836902720887fb4529ea49ea7f5462cf6640e095e11d2 \ - --hash=sha256:d124682c7a23c9764e54ca9ab5b308b14b18eba02722b8659fb238546de83a76 \ - --hash=sha256:d73f419a56d74fef257955f51b18d046f3506270a5fd2ac5febbfa259d6c0fa5 \ - --hash=sha256:f0dc40e6f7aa37af01aba07277d3d64d5a03dc66d682097541ec4da03cc140ee \ - --hash=sha256:f14ad275364c8b4e525d018f6716537ae7b6d369c094805cae45300847e0894f \ - --hash=sha256:f772610fe364372de33d76edcd313636a25684edb94cee53fd790195f5989d14 +cryptography==41.0.3 \ + --hash=sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306 \ + --hash=sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84 \ + --hash=sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47 \ + --hash=sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d \ + --hash=sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116 \ + --hash=sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207 \ + --hash=sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81 \ + --hash=sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087 \ + --hash=sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd \ + --hash=sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507 \ + --hash=sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858 \ + --hash=sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae \ + --hash=sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34 \ + --hash=sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906 \ + --hash=sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd \ + --hash=sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922 \ + --hash=sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7 \ + --hash=sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4 \ + --hash=sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574 \ + --hash=sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1 \ + --hash=sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c \ + --hash=sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e \ + --hash=sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de # via # gcp-releasetool # secretstorage diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9e3898fd1..19409cbd3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,6 +26,6 @@ repos: hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 3.9.2 + rev: 6.1.0 hooks: - id: flake8 diff --git a/noxfile.py b/noxfile.py index 93616485f..3c9ba5eb5 100644 --- a/noxfile.py +++ b/noxfile.py @@ -375,7 +375,9 @@ def lint(session): serious code quality issues. """ - session.install("flake8", BLACK_VERSION) + # Pin flake8 to 6.0.0 + # See https://github.com/googleapis/python-bigquery/issues/1635 + session.install("flake8==6.0.0", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") From 42dfc40aca12feda48e994edb40d21dcda1027c5 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 9 Aug 2023 15:38:45 +0200 Subject: [PATCH 151/536] chore(deps): update all dependencies (#1626) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 8 ++++---- samples/magics/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 34873a5be..db17aeddf 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,13 +13,13 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.13.2; python_version >= '3.8' google-api-core==2.11.1 google-auth==2.22.0 -google-cloud-bigquery==3.11.3 +google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-cloud-core==2.3.3 google-crc32c==1.5.0 google-resumable-media==2.5.0 -googleapis-common-protos==1.59.1 -grpcio==1.56.0 +googleapis-common-protos==1.60.0 +grpcio==1.56.2 idna==3.4 libcst==1.0.1 munch==4.0.0 @@ -32,7 +32,7 @@ pyarrow==12.0.1 pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 -pyparsing==3.1.0 +pyparsing==3.1.1 python-dateutil==2.8.2 pytz==2023.3 PyYAML==6.0.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index edf3dc4b6..ae61f71ff 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,8 +1,8 @@ db-dtypes==1.1.1 google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.56.0 -ipywidgets==8.0.7 +grpcio==1.56.2 +ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c715a450f..0541486c0 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,9 +1,9 @@ db-dtypes==1.1.1 -google-cloud-bigquery==3.11.3 +google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.56.0 -ipywidgets==8.0.7 +grpcio==1.56.2 +ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' From b0199f2c4614e166a86c0506c0639ba9815d83e3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 11 Aug 2023 20:46:46 +0200 Subject: [PATCH 152/536] chore(deps): update all dependencies (#1636) * chore(deps): update all dependencies * revert urllib3 --------- Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index db17aeddf..714e032ad 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -19,7 +19,7 @@ google-cloud-core==2.3.3 google-crc32c==1.5.0 google-resumable-media==2.5.0 googleapis-common-protos==1.60.0 -grpcio==1.56.2 +grpcio==1.57.0 idna==3.4 libcst==1.0.1 munch==4.0.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index ae61f71ff..c3300ae20 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,7 +1,7 @@ db-dtypes==1.1.1 google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.56.2 +grpcio==1.57.0 ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0541486c0..da99249d2 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,7 +2,7 @@ db-dtypes==1.1.1 google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.56.2 +grpcio==1.57.0 ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' From 3e021a46d387a0e3cb69913a281062fc221bb926 Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 14 Aug 2023 09:15:45 -0700 Subject: [PATCH 153/536] feat: widen retry predicate to include ServiceUnavailable (#1641) Expands retry. It's possible in the normal lifecycle of an API frontend for the intermediate response to indicate the API service is not ready. related: internal issue 294103068 --- google/cloud/bigquery/retry.py | 1 + tests/unit/test_retry.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 254b26608..d0830ed13 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -27,6 +27,7 @@ exceptions.TooManyRequests, exceptions.InternalServerError, exceptions.BadGateway, + exceptions.ServiceUnavailable, requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, requests.exceptions.Timeout, diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index e0a992f78..60d04de89 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -79,6 +79,12 @@ def test_w_unstructured_too_many_requests(self): exc = TooManyRequests("testing") self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_service_unavailable(self): + from google.api_core.exceptions import ServiceUnavailable + + exc = ServiceUnavailable("testing") + self.assertTrue(self._call_fut(exc)) + def test_w_internalError(self): exc = mock.Mock(errors=[{"reason": "internalError"}], spec=["errors"]) self.assertTrue(self._call_fut(exc)) From 1760e945d16163980027fecf21113cd77ddc35a1 Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 21 Aug 2023 09:52:38 -0700 Subject: [PATCH 154/536] fix: relax timeout expectations (#1645) * fix: relax timeout expectations Changes to python-api-core can in certain cases cause timeout to be represented as a literal python base object type. This CL adjusts logic that selects from multiple timeout values to better handle this case, which previously assumed either a None or scalar value being present. Fixes: https://github.com/googleapis/python-bigquery/issues/1612 * augment testing * blacken and lint fixes * unused import --- google/cloud/bigquery/client.py | 10 +++- tests/unit/test_client.py | 94 +++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 11cceea42..2712b0c83 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1895,7 +1895,10 @@ def _get_query_results( extra_params: Dict[str, Any] = {"maxResults": 0} if timeout is not None: - timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) + if type(timeout) == object: + timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT + else: + timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) if project is None: project = self.project @@ -3924,7 +3927,10 @@ def _list_rows_from_query_results( } if timeout is not None: - timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) + if type(timeout) == object: + timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT + else: + timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) if start_index is not None: params["startIndex"] = start_index diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cf0aa4028..faa073dce 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -395,6 +395,31 @@ def test__get_query_results_miss_w_short_timeout(self): timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, ) + def test__get_query_results_miss_w_default_timeout(self): + import google.cloud.bigquery.client + from google.cloud.exceptions import NotFound + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection() + path = "/projects/other-project/queries/nothere" + with self.assertRaises(NotFound): + client._get_query_results( + "nothere", + None, + project="other-project", + location=self.LOCATION, + timeout_ms=500, + timeout=object(), # the api core default timeout + ) + + conn.api_request.assert_called_once_with( + method="GET", + path=path, + query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + def test__get_query_results_miss_w_client_location(self): from google.cloud.exceptions import NotFound @@ -438,6 +463,75 @@ def test__get_query_results_hit(self): self.assertEqual(query_results.total_rows, 10) self.assertTrue(query_results.complete) + def test__list_rows_from_query_results_w_none_timeout(self): + from google.cloud.exceptions import NotFound + from google.cloud.bigquery.schema import SchemaField + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection() + path = "/projects/project/queries/nothere" + iterator = client._list_rows_from_query_results( + "nothere", + location=None, + project="project", + schema=[ + SchemaField("f1", "STRING", mode="REQUIRED"), + SchemaField("f2", "INTEGER", mode="REQUIRED"), + ], + timeout=None, + ) + + # trigger the iterator to request data + with self.assertRaises(NotFound): + iterator._get_next_page_response() + + conn.api_request.assert_called_once_with( + method="GET", + path=path, + query_params={ + "fields": "jobReference,totalRows,pageToken,rows", + "location": None, + "formatOptions.useInt64Timestamp": True, + }, + timeout=None, + ) + + def test__list_rows_from_query_results_w_default_timeout(self): + import google.cloud.bigquery.client + from google.cloud.exceptions import NotFound + from google.cloud.bigquery.schema import SchemaField + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection() + path = "/projects/project/queries/nothere" + iterator = client._list_rows_from_query_results( + "nothere", + location=None, + project="project", + schema=[ + SchemaField("f1", "STRING", mode="REQUIRED"), + SchemaField("f2", "INTEGER", mode="REQUIRED"), + ], + timeout=object(), + ) + + # trigger the iterator to request data + with self.assertRaises(NotFound): + iterator._get_next_page_response() + + conn.api_request.assert_called_once_with( + method="GET", + path=path, + query_params={ + "fields": "jobReference,totalRows,pageToken,rows", + "location": None, + "formatOptions.useInt64Timestamp": True, + }, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + def test_default_query_job_config(self): from google.cloud.bigquery import QueryJobConfig From 3645e32aeebefe9d5a4bc71a6513942741f0f196 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 23 Aug 2023 15:04:13 -0400 Subject: [PATCH 155/536] bug: fixes numerous minor issues that cause test failures (#1651) Provides numerous tweaks to correct for failing tests in prerelease testing. --- google/cloud/bigquery/client.py | 2 +- google/cloud/bigquery/job/query.py | 4 ++-- tests/system/test_pandas.py | 9 ++++++++- tests/unit/job/test_query.py | 2 +- tests/unit/job/test_query_pandas.py | 9 +++++++++ tests/unit/test__pandas_helpers.py | 3 +++ tests/unit/test_table.py | 11 +++++++++++ tests/unit/test_table_pandas.py | 9 +++++++++ 8 files changed, 44 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 2712b0c83..f64a81741 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -253,7 +253,7 @@ def __init__( bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None if client_options: - if type(client_options) == dict: + if isinstance(client_options, dict): client_options = google.api_core.client_options.from_dict( client_options ) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 7dddc8278..25d57b501 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1342,10 +1342,10 @@ def _reload_query_results( # Our system does not natively handle that and instead expects # either none or a numeric value. If passed a Python object, convert to # None. - if type(self._done_timeout) == object: # pragma: NO COVER + if isinstance(self._done_timeout, object): # pragma: NO COVER self._done_timeout = None - if self._done_timeout is not None: + if self._done_timeout is not None: # pragma: NO COVER # Subtract a buffer for context switching, network latency, etc. api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS api_timeout = max(min(api_timeout, 10), 0) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 726b68f7c..a46f8e3df 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -41,7 +41,11 @@ "google.cloud.bigquery_storage", minversion="2.0.0" ) -PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") @@ -1006,6 +1010,9 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): assert len(dataframe.index) == 100 +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" +) @pytest.mark.parametrize( ("max_results",), ( diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 33a52cfec..626346016 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -1229,7 +1229,7 @@ def test_result_w_timeout(self): query_request[1]["path"], "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), ) - self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) + self.assertEqual(query_request[1]["timeout"], 120) self.assertEqual( query_request[1]["timeout"], google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 01b60ceb3..f4c7eb06e 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -17,6 +17,7 @@ import json import mock +import pkg_resources import pytest @@ -48,6 +49,11 @@ from .helpers import _make_client from .helpers import _make_job_resource +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + pandas = pytest.importorskip("pandas") try: @@ -646,6 +652,9 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" +) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 07bf03f66..a4cc1fefb 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -546,6 +546,9 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" +) @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a221bc89e..f31dc5528 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -75,6 +75,11 @@ PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + def _mock_client(): from google.cloud.bigquery import client @@ -3677,6 +3682,9 @@ def test_to_dataframe_w_dtypes_mapper(self): self.assertEqual(df.timestamp.dtype.name, "object") @unittest.skipIf(pandas is None, "Requires `pandas`") + @pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" + ) def test_to_dataframe_w_none_dtypes_mapper(self): from google.cloud.bigquery.schema import SchemaField @@ -3789,6 +3797,9 @@ def test_to_dataframe_w_unsupported_dtypes_mapper(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" + ) def test_to_dataframe_column_dtypes(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 5778467a5..dfe512eea 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -15,6 +15,7 @@ import datetime import decimal from unittest import mock +import pkg_resources import pytest @@ -26,6 +27,11 @@ TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data" +if pandas is not None: # pragma: NO COVER + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: # pragma: NO COVER + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + @pytest.fixture def class_under_test(): @@ -34,6 +40,9 @@ def class_under_test(): return RowIterator +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" +) def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): # See tests/system/test_arrow.py for the actual types we get from the API. arrow_schema = pyarrow.schema( From b930e4673b0d1cceb53f683e47578d87af9361f3 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Sat, 2 Sep 2023 05:11:13 -0400 Subject: [PATCH 156/536] feat: search statistics (#1616) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * experimental tweaks * feat: adds two search statistics classes and property * removes several personal debugging sentinels * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * adds tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * cleans up conflict * adds comment * adds some type hints, adds a test for SearchReasons * cleans up some comments * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update tests/unit/job/test_query_stats.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updated type checks to be isinstance checks per linter * update linting * Update tests/unit/job/test_query_stats.py * Update tests/unit/job/test_query_stats.py * experiments with some tests that are failing * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Fix linting * update package verification approach * update pandas installed version constant * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove unused package * set pragma no cover * adds controls to skip testing if pandas exceeds 2.0 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * adds pragma no cover to a simple check * add checks against pandas 2.0 on system test * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * experiments with some tests that are failing * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * resolves merge conflict * resolves merge conflict * resolve conflicts * resolve merge conflicts * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates due to faulty confict resolution * adds docstrings to two classes * corrects formatting * Update tests/unit/job/test_query_stats.py * Update tests/unit/job/test_query_stats.py * updates default values and corrects mypy errors * corrects linting * Update google/cloud/bigquery/job/query.py --------- Co-authored-by: Owl Bot --- google/cloud/bigquery/job/query.py | 63 ++++++++++++++++++++++++++- tests/unit/job/test_query.py | 22 ++++++++++ tests/unit/job/test_query_stats.py | 69 ++++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 25d57b501..429e33e7e 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -198,6 +198,59 @@ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": return cls(*args) +class IndexUnusedReason(typing.NamedTuple): + """Reason about why no search index was used in the search query (or sub-query). + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#indexunusedreason + """ + + code: Optional[str] = None + """Specifies the high-level reason for the scenario when no search index was used. + """ + + message: Optional[str] = None + """Free form human-readable reason for the scenario when no search index was used. + """ + + baseTable: Optional[TableReference] = None + """Specifies the base table involved in the reason that no search index was used. + """ + + indexName: Optional[str] = None + """Specifies the name of the unused search index, if available.""" + + @classmethod + def from_api_repr(cls, reason): + code = reason.get("code") + message = reason.get("message") + baseTable = reason.get("baseTable") + indexName = reason.get("indexName") + + return cls(code, message, baseTable, indexName) + + +class SearchStats(typing.NamedTuple): + """Statistics related to Search Queries. Populated as part of JobStatistics2. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#searchstatistics + """ + + mode: Optional[str] = None + """Indicates the type of search index usage in the entire search query.""" + + reason: List[IndexUnusedReason] = [] + """Reason about why no search index was used in the search query (or sub-query)""" + + @classmethod + def from_api_repr(cls, stats: Dict[str, Any]): + mode = stats.get("indexUsageMode", None) + reason = [ + IndexUnusedReason.from_api_repr(r) + for r in stats.get("indexUnusedReasons", []) + ] + return cls(mode, reason) + + class ScriptOptions: """Options controlling the execution of scripts. @@ -724,7 +777,6 @@ def to_api_repr(self) -> dict: Dict: A dictionary in the format used by the BigQuery API. """ resource = copy.deepcopy(self._properties) - # Query parameters have an addition property associated with them # to indicate if the query is using named or positional parameters. query_parameters = resource["query"].get("queryParameters") @@ -858,6 +910,15 @@ def priority(self): """ return self.configuration.priority + @property + def search_stats(self) -> Optional[SearchStats]: + """Returns a SearchStats object.""" + + stats = self._job_statistics().get("searchStatistics") + if stats is not None: + return SearchStats.from_api_repr(stats) + return None + @property def query(self): """str: The query text used in this query job. diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 626346016..7d3186d47 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -911,6 +911,28 @@ def test_dml_stats(self): assert isinstance(job.dml_stats, DmlStats) assert job.dml_stats.inserted_row_count == 35 + def test_search_stats(self): + from google.cloud.bigquery.job.query import SearchStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.search_stats is None + + statistics = job._properties["statistics"] = {} + assert job.search_stats is None + + query_stats = statistics["query"] = {} + assert job.search_stats is None + + query_stats["searchStatistics"] = { + "indexUsageMode": "INDEX_USAGE_MODE_UNSPECIFIED", + "indexUnusedReasons": [], + } + # job.search_stats is a daisy-chain of calls and gets: + # job.search_stats << job._job_statistics << job._properties + assert isinstance(job.search_stats, SearchStats) + assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED" + def test_result(self): from google.cloud.bigquery.table import RowIterator diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py index 13e022ced..bdd0fb627 100644 --- a/tests/unit/job/test_query_stats.py +++ b/tests/unit/job/test_query_stats.py @@ -108,6 +108,75 @@ def test_from_api_repr_full_stats(self): assert result.updated_row_count == 4 +class TestSearchStatistics: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job.query import SearchStats + + return SearchStats + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + def test_ctor_defaults(self): + search_stats = self._make_one() + assert search_stats.mode is None + assert search_stats.reason == [] + + def test_from_api_repr_unspecified(self): + klass = self._get_target_class() + result = klass.from_api_repr( + {"indexUsageMode": "INDEX_USAGE_MODE_UNSPECIFIED", "indexUnusedReasons": []} + ) + + assert isinstance(result, klass) + assert result.mode == "INDEX_USAGE_MODE_UNSPECIFIED" + assert result.reason == [] + + +class TestIndexUnusedReason: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job.query import IndexUnusedReason + + return IndexUnusedReason + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + def test_ctor_defaults(self): + search_reason = self._make_one() + assert search_reason.code is None + assert search_reason.message is None + assert search_reason.baseTable is None + assert search_reason.indexName is None + + def test_from_api_repr_unspecified(self): + klass = self._get_target_class() + result = klass.from_api_repr( + { + "code": "INDEX_CONFIG_NOT_AVAILABLE", + "message": "There is no search index...", + "baseTable": { + "projectId": "bigquery-public-data", + "datasetId": "usa_names", + "tableId": "usa_1910_current", + }, + "indexName": None, + } + ) + + assert isinstance(result, klass) + assert result.code == "INDEX_CONFIG_NOT_AVAILABLE" + assert result.message == "There is no search index..." + assert result.baseTable == { + "projectId": "bigquery-public-data", + "datasetId": "usa_names", + "tableId": "usa_1910_current", + } + assert result.indexName is None + + class TestQueryPlanEntryStep(_Base): KIND = "KIND" SUBSTEPS = ("SUB1", "SUB2") From 7248f1fc8e1b31ea1e02858509e2841db21c91d0 Mon Sep 17 00:00:00 2001 From: meredithslota Date: Fri, 15 Sep 2023 07:57:38 -0700 Subject: [PATCH 157/536] chore(docs): update region tag and move sample and test (#1648) * chore: added new region tags to create_table_external_data_configuration.py * chore: delete test as part of sample migration * chore: delete sample as part of sample migration * chore: move sample and test to /snippets/ * chore: update import statement in create_table_external_data_configuration_test.py * chore: fix import statement in create_table_external_data_configuration_test.py * chore: update sample location in tables.rst --- docs/usage/tables.rst | 4 ++-- .../create_table_external_data_configuration.py | 4 ++++ .../create_table_external_data_configuration_test.py} | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) rename samples/{ => snippets}/create_table_external_data_configuration.py (94%) rename samples/{tests/test_create_table_external_data_configuration.py => snippets/create_table_external_data_configuration_test.py} (94%) diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst index 105e93637..a4f42b15c 100644 --- a/docs/usage/tables.rst +++ b/docs/usage/tables.rst @@ -61,7 +61,7 @@ Create an empty table with the Create a table using an external data source with the :func:`~google.cloud.bigquery.client.Client.create_table` method: -.. literalinclude:: ../samples/create_table_external_data_configuration.py +.. literalinclude:: ../samples/snippets/create_table_external_data_configuration.py :language: python :dedent: 4 :start-after: [START bigquery_create_table_external_data_configuration] @@ -313,4 +313,4 @@ Replace the table data with a Parquet file from Cloud Storage: :language: python :dedent: 4 :start-after: [START bigquery_load_table_gcs_parquet_truncate] - :end-before: [END bigquery_load_table_gcs_parquet_truncate] \ No newline at end of file + :end-before: [END bigquery_load_table_gcs_parquet_truncate] diff --git a/samples/create_table_external_data_configuration.py b/samples/snippets/create_table_external_data_configuration.py similarity index 94% rename from samples/create_table_external_data_configuration.py rename to samples/snippets/create_table_external_data_configuration.py index 068f91555..cbb15d40a 100644 --- a/samples/create_table_external_data_configuration.py +++ b/samples/snippets/create_table_external_data_configuration.py @@ -18,6 +18,7 @@ def create_table_external_data_configuration( ) -> None: """Create a table using an external data source""" orig_table_id = table_id + # [START bigquery_query_external_gcs_perm] # [START bigquery_create_table_external_data_configuration] # [START bigquery_create_external_table_definition] from google.cloud import bigquery @@ -28,7 +29,9 @@ def create_table_external_data_configuration( # TODO(developer): Set table_id to the ID of the table to create. table_id = "your-project.your_dataset.your_table_name" # [END bigquery_create_table_external_data_configuration] + # [END bigquery_query_external_gcs_perm] table_id = orig_table_id + # [START bigquery_query_external_gcs_perm] # [START bigquery_create_table_external_data_configuration] # TODO(developer): Set the external source format of your table. @@ -64,3 +67,4 @@ def create_table_external_data_configuration( f"Created table with external source format {table.external_data_configuration.source_format}" ) # [END bigquery_create_table_external_data_configuration] + # [END bigquery_query_external_gcs_perm] diff --git a/samples/tests/test_create_table_external_data_configuration.py b/samples/snippets/create_table_external_data_configuration_test.py similarity index 94% rename from samples/tests/test_create_table_external_data_configuration.py rename to samples/snippets/create_table_external_data_configuration_test.py index bf4cf17d4..7bbcde32b 100644 --- a/samples/tests/test_create_table_external_data_configuration.py +++ b/samples/snippets/create_table_external_data_configuration_test.py @@ -14,7 +14,7 @@ import typing -from .. import create_table_external_data_configuration +import create_table_external_data_configuration if typing.TYPE_CHECKING: import pytest From 5deba50b8c2d91d08bd5f5fb68742268c494b4a9 Mon Sep 17 00:00:00 2001 From: sriram Date: Fri, 15 Sep 2023 22:21:56 +0530 Subject: [PATCH 158/536] feat: add `Dataset.storage_billing_model` setter, use `client.update_dataset(ds, fields=["storage_billing_model"])` to update (#1643) Adding Storage Billing Model property. See: https://cloud.google.com/bigquery/docs/updating-datasets#update_storage_billing_models --------- Co-authored-by: Tim Swast --- google/cloud/bigquery/dataset.py | 33 ++++++++++++++++++++++++++++++++ tests/unit/test_dataset.py | 25 +++++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 513c32d9c..114f0de18 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -527,6 +527,7 @@ class Dataset(object): "default_table_expiration_ms": "defaultTableExpirationMs", "friendly_name": "friendlyName", "default_encryption_configuration": "defaultEncryptionConfiguration", + "storage_billing_model": "storageBillingModel", } def __init__(self, dataset_ref) -> None: @@ -763,6 +764,38 @@ def default_encryption_configuration(self, value): api_repr = value.to_api_repr() self._properties["defaultEncryptionConfiguration"] = api_repr + @property + def storage_billing_model(self): + """Union[str, None]: StorageBillingModel of the dataset as set by the user + (defaults to :data:`None`). + + Set the value to one of ``'LOGICAL'`` or ``'PHYSICAL'``. This change + takes 24 hours to take effect and you must wait 14 days before you can + change the storage billing model again. + + See `storage billing model + `_ + in REST API docs and `updating the storage billing model + `_ + guide. + + Raises: + ValueError: for invalid value types. + """ + return self._properties.get("storageBillingModel") + + @storage_billing_model.setter + def storage_billing_model(self, value): + if not isinstance(value, str) and value is not None: + raise ValueError( + "storage_billing_model must be a string (e.g. 'LOGICAL', 'PHYSICAL'), or None. " + f"Got {repr(value)}." + ) + if value: + self._properties["storageBillingModel"] = value + if value is None: + self._properties["storageBillingModel"] = "LOGICAL" + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 5e26a0c03..f2bdf8db5 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -667,6 +667,7 @@ def _make_resource(self): "location": "US", "selfLink": self.RESOURCE_URL, "defaultTableExpirationMs": 3600, + "storageBillingModel": "LOGICAL", "access": [ {"role": "OWNER", "userByEmail": USER_EMAIL}, {"role": "OWNER", "groupByEmail": GROUP_EMAIL}, @@ -736,7 +737,12 @@ def _verify_resource_properties(self, dataset, resource): ) else: self.assertIsNone(dataset.default_encryption_configuration) - + if "storageBillingModel" in resource: + self.assertEqual( + dataset.storage_billing_model, resource.get("storageBillingModel") + ) + else: + self.assertIsNone(dataset.storage_billing_model) if "access" in resource: self._verify_access_entry(dataset.access_entries, resource) else: @@ -941,6 +947,23 @@ def test_default_encryption_configuration_setter(self): dataset.default_encryption_configuration = None self.assertIsNone(dataset.default_encryption_configuration) + def test_storage_billing_model_setter(self): + dataset = self._make_one(self.DS_REF) + dataset.storage_billing_model = "PHYSICAL" + self.assertEqual(dataset.storage_billing_model, "PHYSICAL") + + def test_storage_billing_model_setter_with_none(self): + dataset = self._make_one(self.DS_REF) + dataset.storage_billing_model = None + self.assertEqual(dataset.storage_billing_model, "LOGICAL") + + def test_storage_billing_model_setter_with_invalid_type(self): + dataset = self._make_one(self.DS_REF) + with self.assertRaises(ValueError) as raises: + dataset.storage_billing_model = object() + + self.assertIn("storage_billing_model", str(raises.exception)) + def test_from_string(self): cls = self._get_target_class() got = cls.from_string("string-project.string_dataset") From 03194e0156ed9201cb36301967c5af117d7ef29c Mon Sep 17 00:00:00 2001 From: Jared Chapman Date: Fri, 15 Sep 2023 13:17:01 -0500 Subject: [PATCH 159/536] docs: Revise update_table_expiration sample (#1457) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: Revise update_table_expiration sample * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Tim Swast Co-authored-by: aribray <45905583+aribray@users.noreply.github.com> Co-authored-by: Anthonios Partheniou --- docs/snippets.py | 2 + samples/snippets/update_table_expiration.py | 45 +++++++++++++++++++ .../snippets/update_table_expiration_test.py | 44 ++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 samples/snippets/update_table_expiration.py create mode 100644 samples/snippets/update_table_expiration_test.py diff --git a/docs/snippets.py b/docs/snippets.py index e1d9ae839..d458b832c 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -220,6 +220,8 @@ def test_update_table_expiration(client, to_delete): table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) table = client.create_table(table) + # TODO(thejaredchapman): After code sample has been updated from cloud.google.com delete this. + # [START bigquery_update_table_expiration] import datetime diff --git a/samples/snippets/update_table_expiration.py b/samples/snippets/update_table_expiration.py new file mode 100644 index 000000000..bf944800f --- /dev/null +++ b/samples/snippets/update_table_expiration.py @@ -0,0 +1,45 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + + +def update_table_expiration(table_id, expiration): + orig_table_id = table_id + orig_expiration = expiration + + # [START bigquery_update_table_expiration] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to the full name of the table you want to update. + table_id = "your-project.your_dataset.your_table_name" + + # TODO(dev): Set table to expire for desired days days from now. + expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=5 + ) + # [END bigquery_update_table_expiration] + + table_id = orig_table_id + expiration = orig_expiration + + # [START bigquery_update_table_expiration] + table = client.get_table(table_id) # Make an API request. + table.expires = expiration + table = client.update_table(table, ["expires"]) # API request + + print(f"Updated {table_id}, expires {table.expires}.") + # [END bigquery_update_table_expiration] diff --git a/samples/snippets/update_table_expiration_test.py b/samples/snippets/update_table_expiration_test.py new file mode 100644 index 000000000..721bf53aa --- /dev/null +++ b/samples/snippets/update_table_expiration_test.py @@ -0,0 +1,44 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import typing + +import update_table_expiration + +if typing.TYPE_CHECKING: + import pathlib + + import pytest + + +def test_update_table_expiration( + capsys: "pytest.CaptureFixture[str]", + table_id: str, + tmp_path: "pathlib.Path", +) -> None: + + # This was not needed for function, only for test + expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=5 + ) + + update_table_expiration.update_table_expiration(table_id, expiration) + + out, _ = capsys.readouterr() + assert "Updated" in out + assert table_id in out + assert str(expiration.day) in out + assert str(expiration.month) in out + assert str(expiration.year) in out From 30f605d687ea3d33d031618762db506d73e655a0 Mon Sep 17 00:00:00 2001 From: meredithslota Date: Mon, 18 Sep 2023 11:50:41 -0700 Subject: [PATCH 160/536] chore: de-dupe region tag `bigquery_query_external_gcs_perm` (#1658) --- docs/snippets.py | 42 ------------------------------------------ 1 file changed, 42 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index d458b832c..3a46cd36c 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -617,48 +617,6 @@ def test_client_query_total_rows(client, capsys): assert "Got 100 rows." in out -def test_query_external_gcs_permanent_table(client, to_delete): - dataset_id = "query_external_gcs_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_query_external_gcs_perm] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - # Configure the external data source - dataset_ref = bigquery.DatasetReference(project, dataset_id) - table_id = "us_states" - schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - table = bigquery.Table(dataset_ref.table(table_id), schema=schema) - external_config = bigquery.ExternalConfig("CSV") - external_config.source_uris = [ - "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - ] - external_config.options.skip_leading_rows = 1 # optionally skip header row - table.external_data_configuration = external_config - - # Create a permanent table linked to the GCS file - table = client.create_table(table) # API request - - # Example query to find states starting with 'W' - sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) - - query_job = client.query(sql) # API request - - w_states = list(query_job) # Waits for query to finish - print("There are {} states with names starting with W.".format(len(w_states))) - # [END bigquery_query_external_gcs_perm] - assert len(w_states) == 4 - - def test_ddl_create_view(client, to_delete, capsys): """Create a view via a DDL query.""" project = client.project From 54a77694afcd80be4ba469c6ebb7ca8be112b04e Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 18 Sep 2023 16:15:23 -0400 Subject: [PATCH 161/536] fix: use isinstance() per E721, unpin flake8 (#1659) * fix: use isinstance() per E721, unpin flake8 * change type assertion --- google/cloud/bigquery/client.py | 4 ++-- noxfile.py | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index f64a81741..b4783fc56 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1895,7 +1895,7 @@ def _get_query_results( extra_params: Dict[str, Any] = {"maxResults": 0} if timeout is not None: - if type(timeout) == object: + if not isinstance(timeout, (int, float)): timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT else: timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) @@ -3927,7 +3927,7 @@ def _list_rows_from_query_results( } if timeout is not None: - if type(timeout) == object: + if not isinstance(timeout, (int, float)): timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT else: timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) diff --git a/noxfile.py b/noxfile.py index 3c9ba5eb5..93616485f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -375,9 +375,7 @@ def lint(session): serious code quality issues. """ - # Pin flake8 to 6.0.0 - # See https://github.com/googleapis/python-bigquery/issues/1635 - session.install("flake8==6.0.0", BLACK_VERSION) + session.install("flake8", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") From 514d3e12e5131bd589dff08893fd89bf40338ba3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 2 Oct 2023 11:56:44 -0500 Subject: [PATCH 162/536] fix: allow `storage_billing_model` to be explicitly set to `None` to use project default value (#1665) * fix: allow `storage_billing_model` to be explicitly set to `None` to use project default value * add STORAGE_BILLING_MODEL_UNSPECIFIED to docstring --- google/cloud/bigquery/dataset.py | 17 ++++++++--------- tests/unit/test_dataset.py | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 114f0de18..a9c1cd884 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -769,9 +769,10 @@ def storage_billing_model(self): """Union[str, None]: StorageBillingModel of the dataset as set by the user (defaults to :data:`None`). - Set the value to one of ``'LOGICAL'`` or ``'PHYSICAL'``. This change - takes 24 hours to take effect and you must wait 14 days before you can - change the storage billing model again. + Set the value to one of ``'LOGICAL'``, ``'PHYSICAL'``, or + ``'STORAGE_BILLING_MODEL_UNSPECIFIED'``. This change takes 24 hours to + take effect and you must wait 14 days before you can change the storage + billing model again. See `storage billing model `_ @@ -788,13 +789,11 @@ def storage_billing_model(self): def storage_billing_model(self, value): if not isinstance(value, str) and value is not None: raise ValueError( - "storage_billing_model must be a string (e.g. 'LOGICAL', 'PHYSICAL'), or None. " - f"Got {repr(value)}." + "storage_billing_model must be a string (e.g. 'LOGICAL'," + " 'PHYSICAL', 'STORAGE_BILLING_MODEL_UNSPECIFIED'), or None." + f" Got {repr(value)}." ) - if value: - self._properties["storageBillingModel"] = value - if value is None: - self._properties["storageBillingModel"] = "LOGICAL" + self._properties["storageBillingModel"] = value @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index f2bdf8db5..3b1452805 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -955,7 +955,7 @@ def test_storage_billing_model_setter(self): def test_storage_billing_model_setter_with_none(self): dataset = self._make_one(self.DS_REF) dataset.storage_billing_model = None - self.assertEqual(dataset.storage_billing_model, "LOGICAL") + self.assertIsNone(dataset.storage_billing_model) def test_storage_billing_model_setter_with_invalid_type(self): dataset = self._make_one(self.DS_REF) From 53aad826a55dd1c36d014c80ada515273e8aa92a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:12:41 -0400 Subject: [PATCH 163/536] chore(main): release 3.12.0 (#1642) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 21 +++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf64e2222..a93bde9eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,27 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.12.0](https://github.com/googleapis/python-bigquery/compare/v3.11.4...v3.12.0) (2023-10-02) + + +### Features + +* Add `Dataset.storage_billing_model` setter, use `client.update_dataset(ds, fields=["storage_billing_model"])` to update ([#1643](https://github.com/googleapis/python-bigquery/issues/1643)) ([5deba50](https://github.com/googleapis/python-bigquery/commit/5deba50b8c2d91d08bd5f5fb68742268c494b4a9)) +* Search statistics ([#1616](https://github.com/googleapis/python-bigquery/issues/1616)) ([b930e46](https://github.com/googleapis/python-bigquery/commit/b930e4673b0d1cceb53f683e47578d87af9361f3)) +* Widen retry predicate to include ServiceUnavailable ([#1641](https://github.com/googleapis/python-bigquery/issues/1641)) ([3e021a4](https://github.com/googleapis/python-bigquery/commit/3e021a46d387a0e3cb69913a281062fc221bb926)) + + +### Bug Fixes + +* Allow `storage_billing_model` to be explicitly set to `None` to use project default value ([#1665](https://github.com/googleapis/python-bigquery/issues/1665)) ([514d3e1](https://github.com/googleapis/python-bigquery/commit/514d3e12e5131bd589dff08893fd89bf40338ba3)) +* Relax timeout expectations ([#1645](https://github.com/googleapis/python-bigquery/issues/1645)) ([1760e94](https://github.com/googleapis/python-bigquery/commit/1760e945d16163980027fecf21113cd77ddc35a1)) +* Use isinstance() per E721, unpin flake8 ([#1659](https://github.com/googleapis/python-bigquery/issues/1659)) ([54a7769](https://github.com/googleapis/python-bigquery/commit/54a77694afcd80be4ba469c6ebb7ca8be112b04e)) + + +### Documentation + +* Revise update_table_expiration sample ([#1457](https://github.com/googleapis/python-bigquery/issues/1457)) ([03194e0](https://github.com/googleapis/python-bigquery/commit/03194e0156ed9201cb36301967c5af117d7ef29c)) + ## [3.11.4](https://github.com/googleapis/python-bigquery/compare/v3.11.3...v3.11.4) (2023-07-19) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a97ccc0c8..ea71d198b 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.11.4" +__version__ = "3.12.0" From 40ba859059c3e463e17ea7781bc5a9aff8244c5d Mon Sep 17 00:00:00 2001 From: Jared Chapman Date: Thu, 5 Oct 2023 12:57:10 -0500 Subject: [PATCH 164/536] docs: revised `create_partitioned_table` sample (#1447) * docs: revised create_partitioned_table sample * update sample tests to use correct fixture --------- Co-authored-by: Tim Swast --- docs/snippets.py | 2 + samples/snippets/create_partitioned_table.py | 45 +++++++++++++++++++ .../snippets/create_partitioned_table_test.py | 34 ++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 samples/snippets/create_partitioned_table.py create mode 100644 samples/snippets/create_partitioned_table_test.py diff --git a/docs/snippets.py b/docs/snippets.py index 3a46cd36c..7f9b4f59e 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -125,6 +125,8 @@ def test_create_partitioned_table(client, to_delete): dataset = client.create_dataset(dataset_ref) to_delete.append(dataset) + # TODO(tswast): remove this snippet once cloud.google.com is updated to use + # samples/snippets/create_partitioned_table.py # [START bigquery_create_table_partitioned] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/samples/snippets/create_partitioned_table.py b/samples/snippets/create_partitioned_table.py new file mode 100644 index 000000000..0277d7d0f --- /dev/null +++ b/samples/snippets/create_partitioned_table.py @@ -0,0 +1,45 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_partitioned_table(table_id): + your_fully_qualified_table_id = table_id + + # [START bigquery_create_table_partitioned] + from google.cloud import bigquery + + client = bigquery.Client() + + # Use format "your-project.your_dataset.your_table_name" for table_id + table_id = your_fully_qualified_table_id + schema = [ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + bigquery.SchemaField("date", "DATE"), + ] + table = bigquery.Table(table_id, schema=schema) + table.time_partitioning = bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", # name of column to use for partitioning + expiration_ms=1000 * 60 * 60 * 24 * 90, + ) # 90 days + + table = client.create_table(table) + + print( + f"Created table {table.project}.{table.dataset_id}.{table.table_id}, " + f"partitioned on column {table.time_partitioning.field}." + ) + # [END bigquery_create_table_partitioned] + return table diff --git a/samples/snippets/create_partitioned_table_test.py b/samples/snippets/create_partitioned_table_test.py new file mode 100644 index 000000000..0f684fcb0 --- /dev/null +++ b/samples/snippets/create_partitioned_table_test.py @@ -0,0 +1,34 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import create_partitioned_table + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_partitioned_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + table = create_partitioned_table.create_partitioned_table(random_table_id) + + out, _ = capsys.readouterr() + assert "Created" in out + assert random_table_id in out + + assert table.time_partitioning.type_ == "DAY" + assert table.time_partitioning.field == "date" From b8c9276be011d971b941b583fd3d4417d438067f Mon Sep 17 00:00:00 2001 From: Jared Chapman Date: Thu, 5 Oct 2023 14:03:46 -0500 Subject: [PATCH 165/536] docs: revised relax column mode sample (#1467) * docs: Revised relax_column sample * add todo for snippets.py cleanup --------- Co-authored-by: Tim Swast --- docs/snippets.py | 2 ++ samples/snippets/relax_column.py | 52 +++++++++++++++++++++++++++ samples/snippets/relax_column_test.py | 46 ++++++++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 samples/snippets/relax_column.py create mode 100644 samples/snippets/relax_column_test.py diff --git a/docs/snippets.py b/docs/snippets.py index 7f9b4f59e..62b0b6fd6 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -265,6 +265,8 @@ def test_relax_column(client, to_delete): dataset = client.create_dataset(dataset) to_delete.append(dataset) + # TODO(tswast): remove code sample once references to it on + # cloud.google.com are updated to samples/snippets/relax_column.py # [START bigquery_relax_column] # from google.cloud import bigquery # client = bigquery.Client() diff --git a/samples/snippets/relax_column.py b/samples/snippets/relax_column.py new file mode 100644 index 000000000..bcd79cee8 --- /dev/null +++ b/samples/snippets/relax_column.py @@ -0,0 +1,52 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + + +def relax_column(table_id: str) -> bigquery.Table: + orig_table_id = table_id + + # [START bigquery_relax_column] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to full name of the table you want to create. + table_id = "your-project.your_dataset.your_table" + + # [END bigquery_relax_column] + table_id = orig_table_id + + # [START bigquery_relax_column] + table = client.get_table(table_id) + new_schema = [] + for field in table.schema: + if field.mode != "REQUIRED": + new_schema.append(field) + else: + # SchemaField properties cannot be edited after initialization. + # To make changes, construct new SchemaField objects. + new_field = field.to_api_repr() + new_field["mode"] = "NULLABLE" + relaxed_field = bigquery.SchemaField.from_api_repr(new_field) + new_schema.append(relaxed_field) + + table.schema = new_schema + table = client.update_table(table, ["schema"]) + + print(f"Updated {table_id} schema: {table.schema}.") + + # [END bigquery_relax_column] + return table diff --git a/samples/snippets/relax_column_test.py b/samples/snippets/relax_column_test.py new file mode 100644 index 000000000..b40b13fa1 --- /dev/null +++ b/samples/snippets/relax_column_test.py @@ -0,0 +1,46 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from google.cloud import bigquery + +import relax_column + +if typing.TYPE_CHECKING: + import pytest + + +def test_relax_column( + capsys: "pytest.CaptureFixture[str]", + bigquery_client: bigquery.Client, + random_table_id: str, +) -> None: + table = bigquery.Table( + random_table_id, + schema=[ + bigquery.SchemaField("string_col", "STRING", mode="NULLABLE"), + bigquery.SchemaField("string_col2", "STRING", mode="REQUIRED"), + ], + ) + + bigquery_client.create_table(table) + table = relax_column.relax_column(random_table_id) + + out, _ = capsys.readouterr() + + assert all(field.mode == "NULLABLE" for field in table.schema) + assert "REQUIRED" not in out + assert "NULLABLE" in out + assert random_table_id in out From 924e081808375db8a4bb753928d62851c8eb1892 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 6 Oct 2023 22:32:55 -0400 Subject: [PATCH 166/536] chore: [autoapprove] bump cryptography from 41.0.3 to 41.0.4 (#1666) Source-Link: https://github.com/googleapis/synthtool/commit/dede53ff326079b457cfb1aae5bbdc82cbb51dc3 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:fac304457974bb530cc5396abd4ab25d26a469cd3bc97cbfb18c8d4324c584eb Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- .gitignore | 1 + .kokoro/requirements.txt | 49 ++++++++++++++++++++------------------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index a3da1b0d4..a9bdb1b7a 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7 -# created: 2023-08-02T10:53:29.114535628Z + digest: sha256:fac304457974bb530cc5396abd4ab25d26a469cd3bc97cbfb18c8d4324c584eb +# created: 2023-10-02T21:31:03.517640371Z diff --git a/.gitignore b/.gitignore index 99c3a1444..168b201f6 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ docs.metadata # Virtual environment env/ +venv/ # Test logs coverage.xml diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 029bd342d..96d593c8c 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -113,30 +113,30 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==41.0.3 \ - --hash=sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306 \ - --hash=sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84 \ - --hash=sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47 \ - --hash=sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d \ - --hash=sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116 \ - --hash=sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207 \ - --hash=sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81 \ - --hash=sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087 \ - --hash=sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd \ - --hash=sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507 \ - --hash=sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858 \ - --hash=sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae \ - --hash=sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34 \ - --hash=sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906 \ - --hash=sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd \ - --hash=sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922 \ - --hash=sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7 \ - --hash=sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4 \ - --hash=sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574 \ - --hash=sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1 \ - --hash=sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c \ - --hash=sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e \ - --hash=sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de +cryptography==41.0.4 \ + --hash=sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67 \ + --hash=sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311 \ + --hash=sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8 \ + --hash=sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13 \ + --hash=sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143 \ + --hash=sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f \ + --hash=sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829 \ + --hash=sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd \ + --hash=sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397 \ + --hash=sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac \ + --hash=sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d \ + --hash=sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a \ + --hash=sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839 \ + --hash=sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e \ + --hash=sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6 \ + --hash=sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9 \ + --hash=sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860 \ + --hash=sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca \ + --hash=sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91 \ + --hash=sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d \ + --hash=sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714 \ + --hash=sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb \ + --hash=sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f # via # gcp-releasetool # secretstorage @@ -382,6 +382,7 @@ protobuf==3.20.3 \ # gcp-docuploader # gcp-releasetool # google-api-core + # googleapis-common-protos pyasn1==0.4.8 \ --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \ --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba From faa50b9c9577eea6d487f35ed02dceae5b740e72 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 10 Oct 2023 10:07:23 -0400 Subject: [PATCH 167/536] chore: [autoapprove] Update `black` and `isort` to latest versions (#1678) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: [autoapprove] Update `black` and `isort` to latest versions Source-Link: https://github.com/googleapis/synthtool/commit/0c7b0333f44b2b7075447f43a121a12d15a7b76a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:08e34975760f002746b1d8c86fdc90660be45945ee6d9db914d1508acdf9a547 * update black in noxfile.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/requirements.txt | 6 +++--- .pre-commit-config.yaml | 2 +- google/cloud/bigquery/client.py | 3 --- google/cloud/bigquery/dataset.py | 1 - google/cloud/bigquery/job/query.py | 1 - noxfile.py | 2 +- samples/add_empty_column.py | 1 - samples/browse_table_data.py | 1 - samples/client_list_jobs.py | 1 - samples/client_load_partitioned_table.py | 1 - samples/client_query.py | 1 - samples/client_query_add_column.py | 1 - samples/client_query_batch.py | 1 - samples/client_query_destination_table.py | 1 - samples/client_query_destination_table_clustered.py | 1 - samples/client_query_destination_table_cmek.py | 1 - samples/client_query_destination_table_legacy.py | 1 - samples/client_query_dry_run.py | 1 - samples/client_query_legacy_sql.py | 1 - samples/client_query_relax_column.py | 1 - samples/client_query_w_array_params.py | 1 - samples/client_query_w_named_params.py | 1 - samples/client_query_w_positional_params.py | 1 - samples/client_query_w_struct_params.py | 1 - samples/client_query_w_timestamp_params.py | 1 - samples/copy_table.py | 1 - samples/copy_table_cmek.py | 1 - samples/copy_table_multiple_source.py | 1 - samples/create_dataset.py | 1 - samples/create_job.py | 1 - samples/create_routine.py | 1 - samples/create_routine_ddl.py | 1 - samples/create_table.py | 1 - samples/create_table_clustered.py | 1 - samples/create_table_range_partitioned.py | 1 - samples/dataset_exists.py | 1 - samples/delete_dataset.py | 1 - samples/delete_dataset_labels.py | 1 - samples/delete_routine.py | 1 - samples/delete_table.py | 1 - samples/download_public_data.py | 1 - samples/download_public_data_sandbox.py | 1 - samples/geography/insert_geojson.py | 1 - samples/geography/insert_wkt.py | 1 - samples/get_dataset.py | 1 - samples/get_dataset_labels.py | 1 - samples/get_routine.py | 1 - samples/get_table.py | 1 - samples/label_dataset.py | 1 - samples/list_datasets.py | 1 - samples/list_datasets_by_label.py | 1 - samples/list_routines.py | 1 - samples/list_tables.py | 1 - samples/load_table_clustered.py | 1 - samples/load_table_dataframe.py | 1 - samples/load_table_file.py | 1 - samples/load_table_uri_autodetect_csv.py | 1 - samples/load_table_uri_autodetect_json.py | 1 - samples/load_table_uri_avro.py | 1 - samples/load_table_uri_cmek.py | 1 - samples/load_table_uri_csv.py | 1 - samples/load_table_uri_orc.py | 1 - samples/load_table_uri_truncate_avro.py | 1 - samples/load_table_uri_truncate_csv.py | 1 - samples/load_table_uri_truncate_json.py | 1 - samples/load_table_uri_truncate_orc.py | 1 - samples/load_table_uri_truncate_parquet.py | 1 - samples/query_external_gcs_temporary_table.py | 1 - samples/query_external_sheets_permanent_table.py | 1 - samples/query_external_sheets_temporary_table.py | 1 - samples/query_no_cache.py | 1 - samples/query_pagination.py | 1 - samples/query_to_arrow.py | 1 - samples/snippets/create_table_cmek_test.py | 1 - .../create_table_external_data_configuration_test.py | 1 - samples/snippets/create_table_schema_from_json_test.py | 1 - samples/snippets/create_table_snapshot_test.py | 1 - samples/snippets/delete_label_table_test.py | 1 - samples/snippets/get_table_labels_test.py | 1 - samples/snippets/label_table_test.py | 1 - samples/snippets/load_table_schema_from_json_test.py | 1 - samples/snippets/nested_repeated_schema_test.py | 1 - samples/snippets/quickstart.py | 1 - samples/snippets/quickstart_test.py | 1 - samples/snippets/update_table_expiration_test.py | 1 - samples/snippets/view.py | 1 - samples/table_exists.py | 1 - samples/table_insert_rows.py | 1 - samples/table_insert_rows_explicit_none_insert_ids.py | 1 - samples/tests/test_add_empty_column.py | 1 - samples/tests/test_browse_table_data.py | 1 - samples/tests/test_client_list_jobs.py | 1 - samples/tests/test_client_load_partitioned_table.py | 1 - samples/tests/test_client_query.py | 1 - samples/tests/test_client_query_add_column.py | 1 - samples/tests/test_client_query_batch.py | 1 - samples/tests/test_client_query_destination_table.py | 1 - .../test_client_query_destination_table_clustered.py | 1 - .../tests/test_client_query_destination_table_cmek.py | 1 - .../test_client_query_destination_table_legacy.py | 1 - samples/tests/test_client_query_dry_run.py | 1 - samples/tests/test_client_query_legacy_sql.py | 1 - samples/tests/test_client_query_relax_column.py | 1 - samples/tests/test_client_query_w_array_params.py | 1 - samples/tests/test_client_query_w_named_params.py | 1 - samples/tests/test_client_query_w_positional_params.py | 1 - samples/tests/test_client_query_w_struct_params.py | 1 - samples/tests/test_client_query_w_timestamp_params.py | 1 - samples/tests/test_copy_table_multiple_source.py | 1 - samples/tests/test_create_dataset.py | 1 - samples/tests/test_dataset_exists.py | 1 - samples/tests/test_dataset_label_samples.py | 1 - samples/tests/test_delete_dataset.py | 1 - samples/tests/test_delete_table.py | 1 - samples/tests/test_get_dataset.py | 1 - samples/tests/test_get_table.py | 1 - samples/tests/test_list_tables.py | 1 - samples/tests/test_load_table_clustered.py | 1 - samples/tests/test_load_table_dataframe.py | 1 - samples/tests/test_load_table_uri_autodetect_csv.py | 1 - samples/tests/test_load_table_uri_autodetect_json.py | 1 - samples/tests/test_load_table_uri_cmek.py | 1 - samples/tests/test_load_table_uri_csv.py | 1 - samples/tests/test_load_table_uri_json.py | 1 - samples/tests/test_load_table_uri_orc.py | 1 - samples/tests/test_load_table_uri_parquet.py | 1 - .../tests/test_query_external_gcs_temporary_table.py | 1 - .../test_query_external_sheets_permanent_table.py | 1 - .../test_query_external_sheets_temporary_table.py | 1 - samples/tests/test_query_no_cache.py | 1 - samples/tests/test_query_pagination.py | 1 - samples/tests/test_query_script.py | 1 - samples/tests/test_query_to_arrow.py | 1 - samples/tests/test_table_exists.py | 1 - samples/tests/test_table_insert_rows.py | 1 - .../test_table_insert_rows_explicit_none_insert_ids.py | 1 - samples/tests/test_update_dataset_access.py | 1 - ...test_update_dataset_default_partition_expiration.py | 1 - .../test_update_dataset_default_table_expiration.py | 1 - samples/tests/test_update_dataset_description.py | 1 - .../test_update_table_require_partition_filter.py | 1 - samples/update_dataset_access.py | 1 - samples/update_dataset_default_partition_expiration.py | 1 - samples/update_dataset_default_table_expiration.py | 1 - samples/update_dataset_description.py | 1 - samples/update_routine.py | 1 - samples/update_table_require_partition_filter.py | 1 - tests/system/test_client.py | 1 - tests/unit/job/test_query.py | 1 - tests/unit/routine/test_remote_function_options.py | 1 - tests/unit/test_client.py | 2 -- tests/unit/test_dataset.py | 2 -- tests/unit/test_dbapi__helpers.py | 1 - tests/unit/test_external_config.py | 1 - tests/unit/test_magics.py | 10 ++++++---- tests/unit/test_table.py | 5 ----- 157 files changed, 13 insertions(+), 171 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index a9bdb1b7a..dd98abbde 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:fac304457974bb530cc5396abd4ab25d26a469cd3bc97cbfb18c8d4324c584eb -# created: 2023-10-02T21:31:03.517640371Z + digest: sha256:08e34975760f002746b1d8c86fdc90660be45945ee6d9db914d1508acdf9a547 +# created: 2023-10-09T14:06:13.397766266Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 96d593c8c..0332d3267 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -467,9 +467,9 @@ typing-extensions==4.4.0 \ --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e # via -r requirements.in -urllib3==1.26.12 \ - --hash=sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e \ - --hash=sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997 +urllib3==1.26.17 \ + --hash=sha256:24d6a242c28d29af46c3fae832c36db3bbebcc533dd1bb549172cd739c82df21 \ + --hash=sha256:94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b # via # requests # twine diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 19409cbd3..6a8e16950 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.7.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index b4783fc56..f7c7864a1 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -476,7 +476,6 @@ def list_datasets( span_attributes = {"path": path} def api_request(*args, **kwargs): - return self._call_api( retry, span_name="BigQuery.listDatasets", @@ -2686,7 +2685,6 @@ def load_table_from_dataframe( os.close(tmpfd) try: - if new_job_config.source_format == job.SourceFormat.PARQUET: if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: msg = ( @@ -2722,7 +2720,6 @@ def load_table_from_dataframe( ) else: - dataframe.to_csv( tmppath, index=False, diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index a9c1cd884..b7fed61c7 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -454,7 +454,6 @@ def __ne__(self, other): return not self == other def __repr__(self): - return f"" def _key(self): diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 429e33e7e..7de209b8d 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1231,7 +1231,6 @@ def referenced_tables(self): datasets_by_project_name = {} for table in self._job_statistics().get("referencedTables", ()): - t_project = table["projectId"] ds_id = table["datasetId"] diff --git a/noxfile.py b/noxfile.py index 93616485f..ba06f925d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -24,7 +24,7 @@ MYPY_VERSION = "mypy==0.910" PYTYPE_VERSION = "pytype==2021.4.9" -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ( "docs", "google", diff --git a/samples/add_empty_column.py b/samples/add_empty_column.py index 6d449d6e2..5541a6738 100644 --- a/samples/add_empty_column.py +++ b/samples/add_empty_column.py @@ -14,7 +14,6 @@ def add_empty_column(table_id: str) -> None: - # [START bigquery_add_empty_column] from google.cloud import bigquery diff --git a/samples/browse_table_data.py b/samples/browse_table_data.py index 74b903aa3..2fba65aeb 100644 --- a/samples/browse_table_data.py +++ b/samples/browse_table_data.py @@ -14,7 +14,6 @@ def browse_table_data(table_id: str) -> None: - # [START bigquery_browse_table] from google.cloud import bigquery diff --git a/samples/client_list_jobs.py b/samples/client_list_jobs.py index 7f1e39cb8..335d2ecec 100644 --- a/samples/client_list_jobs.py +++ b/samples/client_list_jobs.py @@ -14,7 +14,6 @@ def client_list_jobs() -> None: - # [START bigquery_list_jobs] from google.cloud import bigquery diff --git a/samples/client_load_partitioned_table.py b/samples/client_load_partitioned_table.py index 9956f3f00..cfdf24819 100644 --- a/samples/client_load_partitioned_table.py +++ b/samples/client_load_partitioned_table.py @@ -14,7 +14,6 @@ def client_load_partitioned_table(table_id: str) -> None: - # [START bigquery_load_table_partitioned] from google.cloud import bigquery diff --git a/samples/client_query.py b/samples/client_query.py index 091d3f98b..4df051ee2 100644 --- a/samples/client_query.py +++ b/samples/client_query.py @@ -14,7 +14,6 @@ def client_query() -> None: - # [START bigquery_query] from google.cloud import bigquery diff --git a/samples/client_query_add_column.py b/samples/client_query_add_column.py index 2da200bc5..ec14087fb 100644 --- a/samples/client_query_add_column.py +++ b/samples/client_query_add_column.py @@ -14,7 +14,6 @@ def client_query_add_column(table_id: str) -> None: - # [START bigquery_add_column_query_append] from google.cloud import bigquery diff --git a/samples/client_query_batch.py b/samples/client_query_batch.py index df164d1be..5c55e278e 100644 --- a/samples/client_query_batch.py +++ b/samples/client_query_batch.py @@ -19,7 +19,6 @@ def client_query_batch() -> "bigquery.QueryJob": - # [START bigquery_query_batch] from google.cloud import bigquery diff --git a/samples/client_query_destination_table.py b/samples/client_query_destination_table.py index b200f1cc6..486576fea 100644 --- a/samples/client_query_destination_table.py +++ b/samples/client_query_destination_table.py @@ -14,7 +14,6 @@ def client_query_destination_table(table_id: str) -> None: - # [START bigquery_query_destination_table] from google.cloud import bigquery diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py index c4ab305f5..de9fff2d0 100644 --- a/samples/client_query_destination_table_clustered.py +++ b/samples/client_query_destination_table_clustered.py @@ -14,7 +14,6 @@ def client_query_destination_table_clustered(table_id: str) -> None: - # [START bigquery_query_clustered_table] from google.cloud import bigquery diff --git a/samples/client_query_destination_table_cmek.py b/samples/client_query_destination_table_cmek.py index 0fd44d189..040c96e22 100644 --- a/samples/client_query_destination_table_cmek.py +++ b/samples/client_query_destination_table_cmek.py @@ -14,7 +14,6 @@ def client_query_destination_table_cmek(table_id: str, kms_key_name: str) -> None: - # [START bigquery_query_destination_table_cmek] from google.cloud import bigquery diff --git a/samples/client_query_destination_table_legacy.py b/samples/client_query_destination_table_legacy.py index ee45d9a01..37f50fdb4 100644 --- a/samples/client_query_destination_table_legacy.py +++ b/samples/client_query_destination_table_legacy.py @@ -14,7 +14,6 @@ def client_query_destination_table_legacy(table_id: str) -> None: - # [START bigquery_query_legacy_large_results] from google.cloud import bigquery diff --git a/samples/client_query_dry_run.py b/samples/client_query_dry_run.py index 418b43cb5..bb4893c2a 100644 --- a/samples/client_query_dry_run.py +++ b/samples/client_query_dry_run.py @@ -19,7 +19,6 @@ def client_query_dry_run() -> "bigquery.QueryJob": - # [START bigquery_query_dry_run] from google.cloud import bigquery diff --git a/samples/client_query_legacy_sql.py b/samples/client_query_legacy_sql.py index c054e1f28..44917e4e0 100644 --- a/samples/client_query_legacy_sql.py +++ b/samples/client_query_legacy_sql.py @@ -14,7 +14,6 @@ def client_query_legacy_sql() -> None: - # [START bigquery_query_legacy] from google.cloud import bigquery diff --git a/samples/client_query_relax_column.py b/samples/client_query_relax_column.py index c96a1e7aa..22ecb33d1 100644 --- a/samples/client_query_relax_column.py +++ b/samples/client_query_relax_column.py @@ -14,7 +14,6 @@ def client_query_relax_column(table_id: str) -> None: - # [START bigquery_relax_column_query_append] from google.cloud import bigquery diff --git a/samples/client_query_w_array_params.py b/samples/client_query_w_array_params.py index 669713182..25592a94a 100644 --- a/samples/client_query_w_array_params.py +++ b/samples/client_query_w_array_params.py @@ -14,7 +14,6 @@ def client_query_w_array_params() -> None: - # [START bigquery_query_params_arrays] from google.cloud import bigquery diff --git a/samples/client_query_w_named_params.py b/samples/client_query_w_named_params.py index f42be1dc8..6dd72d44f 100644 --- a/samples/client_query_w_named_params.py +++ b/samples/client_query_w_named_params.py @@ -14,7 +14,6 @@ def client_query_w_named_params() -> None: - # [START bigquery_query_params_named] from google.cloud import bigquery diff --git a/samples/client_query_w_positional_params.py b/samples/client_query_w_positional_params.py index b088b305e..9cdde69ca 100644 --- a/samples/client_query_w_positional_params.py +++ b/samples/client_query_w_positional_params.py @@ -14,7 +14,6 @@ def client_query_w_positional_params() -> None: - # [START bigquery_query_params_positional] from google.cloud import bigquery diff --git a/samples/client_query_w_struct_params.py b/samples/client_query_w_struct_params.py index 6c5b78113..6b68e78ed 100644 --- a/samples/client_query_w_struct_params.py +++ b/samples/client_query_w_struct_params.py @@ -14,7 +14,6 @@ def client_query_w_struct_params() -> None: - # [START bigquery_query_params_structs] from google.cloud import bigquery diff --git a/samples/client_query_w_timestamp_params.py b/samples/client_query_w_timestamp_params.py index 07d64cc94..c1ade8782 100644 --- a/samples/client_query_w_timestamp_params.py +++ b/samples/client_query_w_timestamp_params.py @@ -14,7 +14,6 @@ def client_query_w_timestamp_params() -> None: - # [START bigquery_query_params_timestamps] import datetime diff --git a/samples/copy_table.py b/samples/copy_table.py index 8c6153fef..3ae6e9ebe 100644 --- a/samples/copy_table.py +++ b/samples/copy_table.py @@ -14,7 +14,6 @@ def copy_table(source_table_id: str, destination_table_id: str) -> None: - # [START bigquery_copy_table] from google.cloud import bigquery diff --git a/samples/copy_table_cmek.py b/samples/copy_table_cmek.py index f2e8a90f9..f03053fab 100644 --- a/samples/copy_table_cmek.py +++ b/samples/copy_table_cmek.py @@ -14,7 +14,6 @@ def copy_table_cmek(dest_table_id: str, orig_table_id: str, kms_key_name: str) -> None: - # [START bigquery_copy_table_cmek] from google.cloud import bigquery diff --git a/samples/copy_table_multiple_source.py b/samples/copy_table_multiple_source.py index 1163b1664..509b8951b 100644 --- a/samples/copy_table_multiple_source.py +++ b/samples/copy_table_multiple_source.py @@ -16,7 +16,6 @@ def copy_table_multiple_source(dest_table_id: str, table_ids: Sequence[str]) -> None: - # [START bigquery_copy_table_multiple_source] from google.cloud import bigquery diff --git a/samples/create_dataset.py b/samples/create_dataset.py index dea91798d..7f645933a 100644 --- a/samples/create_dataset.py +++ b/samples/create_dataset.py @@ -14,7 +14,6 @@ def create_dataset(dataset_id: str) -> None: - # [START bigquery_create_dataset] from google.cloud import bigquery diff --git a/samples/create_job.py b/samples/create_job.py index 129a08a1b..f335e2f7a 100644 --- a/samples/create_job.py +++ b/samples/create_job.py @@ -20,7 +20,6 @@ def create_job() -> "Union[LoadJob, CopyJob, ExtractJob, QueryJob]": - # [START bigquery_create_job] from google.cloud import bigquery diff --git a/samples/create_routine.py b/samples/create_routine.py index 96dc24210..8be1b6a99 100644 --- a/samples/create_routine.py +++ b/samples/create_routine.py @@ -19,7 +19,6 @@ def create_routine(routine_id: str) -> "bigquery.Routine": - # [START bigquery_create_routine] from google.cloud import bigquery diff --git a/samples/create_routine_ddl.py b/samples/create_routine_ddl.py index 56c7cfe24..231d5a142 100644 --- a/samples/create_routine_ddl.py +++ b/samples/create_routine_ddl.py @@ -14,7 +14,6 @@ def create_routine_ddl(routine_id: str) -> None: - # [START bigquery_create_routine_ddl] from google.cloud import bigquery diff --git a/samples/create_table.py b/samples/create_table.py index eaac54696..7fda370ce 100644 --- a/samples/create_table.py +++ b/samples/create_table.py @@ -14,7 +14,6 @@ def create_table(table_id: str) -> None: - # [START bigquery_create_table] from google.cloud import bigquery diff --git a/samples/create_table_clustered.py b/samples/create_table_clustered.py index 1686c519a..a9ad43e59 100644 --- a/samples/create_table_clustered.py +++ b/samples/create_table_clustered.py @@ -19,7 +19,6 @@ def create_table_clustered(table_id: str) -> "bigquery.Table": - # [START bigquery_create_table_clustered] from google.cloud import bigquery diff --git a/samples/create_table_range_partitioned.py b/samples/create_table_range_partitioned.py index 4dc45ed58..128ab87d9 100644 --- a/samples/create_table_range_partitioned.py +++ b/samples/create_table_range_partitioned.py @@ -19,7 +19,6 @@ def create_table_range_partitioned(table_id: str) -> "bigquery.Table": - # [START bigquery_create_table_range_partitioned] from google.cloud import bigquery diff --git a/samples/dataset_exists.py b/samples/dataset_exists.py index 221899a65..784f86524 100644 --- a/samples/dataset_exists.py +++ b/samples/dataset_exists.py @@ -14,7 +14,6 @@ def dataset_exists(dataset_id: str) -> None: - # [START bigquery_dataset_exists] from google.cloud import bigquery from google.cloud.exceptions import NotFound diff --git a/samples/delete_dataset.py b/samples/delete_dataset.py index b340ed57a..9c7644db0 100644 --- a/samples/delete_dataset.py +++ b/samples/delete_dataset.py @@ -14,7 +14,6 @@ def delete_dataset(dataset_id: str) -> None: - # [START bigquery_delete_dataset] from google.cloud import bigquery diff --git a/samples/delete_dataset_labels.py b/samples/delete_dataset_labels.py index ec5df09c1..d5efdf4ea 100644 --- a/samples/delete_dataset_labels.py +++ b/samples/delete_dataset_labels.py @@ -19,7 +19,6 @@ def delete_dataset_labels(dataset_id: str) -> "bigquery.Dataset": - # [START bigquery_delete_label_dataset] from google.cloud import bigquery diff --git a/samples/delete_routine.py b/samples/delete_routine.py index 7362a5fea..604e7f730 100644 --- a/samples/delete_routine.py +++ b/samples/delete_routine.py @@ -14,7 +14,6 @@ def delete_routine(routine_id: str) -> None: - # [START bigquery_delete_routine] from google.cloud import bigquery diff --git a/samples/delete_table.py b/samples/delete_table.py index 9e7ee170a..a8ac4617a 100644 --- a/samples/delete_table.py +++ b/samples/delete_table.py @@ -14,7 +14,6 @@ def delete_table(table_id: str) -> None: - # [START bigquery_delete_table] from google.cloud import bigquery diff --git a/samples/download_public_data.py b/samples/download_public_data.py index a488bbbb5..cb2ebd1fd 100644 --- a/samples/download_public_data.py +++ b/samples/download_public_data.py @@ -14,7 +14,6 @@ def download_public_data() -> None: - # [START bigquery_pandas_public_data] from google.cloud import bigquery diff --git a/samples/download_public_data_sandbox.py b/samples/download_public_data_sandbox.py index ce5200b4e..e165a31ce 100644 --- a/samples/download_public_data_sandbox.py +++ b/samples/download_public_data_sandbox.py @@ -14,7 +14,6 @@ def download_public_data_sandbox() -> None: - # [START bigquery_pandas_public_data_sandbox] from google.cloud import bigquery diff --git a/samples/geography/insert_geojson.py b/samples/geography/insert_geojson.py index 2db407b55..9a6f6c413 100644 --- a/samples/geography/insert_geojson.py +++ b/samples/geography/insert_geojson.py @@ -18,7 +18,6 @@ def insert_geojson( override_values: Optional[Mapping[str, str]] = None ) -> Sequence[Dict[str, object]]: - if override_values is None: override_values = {} diff --git a/samples/geography/insert_wkt.py b/samples/geography/insert_wkt.py index 25c7ee727..2923d2596 100644 --- a/samples/geography/insert_wkt.py +++ b/samples/geography/insert_wkt.py @@ -18,7 +18,6 @@ def insert_wkt( override_values: Optional[Mapping[str, str]] = None ) -> Sequence[Dict[str, object]]: - if override_values is None: override_values = {} diff --git a/samples/get_dataset.py b/samples/get_dataset.py index 5654cbdce..1e4ad2904 100644 --- a/samples/get_dataset.py +++ b/samples/get_dataset.py @@ -14,7 +14,6 @@ def get_dataset(dataset_id: str) -> None: - # [START bigquery_get_dataset] from google.cloud import bigquery diff --git a/samples/get_dataset_labels.py b/samples/get_dataset_labels.py index d97ee3c01..8dc8b9430 100644 --- a/samples/get_dataset_labels.py +++ b/samples/get_dataset_labels.py @@ -14,7 +14,6 @@ def get_dataset_labels(dataset_id: str) -> None: - # [START bigquery_get_dataset_labels] from google.cloud import bigquery diff --git a/samples/get_routine.py b/samples/get_routine.py index 031d9a127..96e85acc9 100644 --- a/samples/get_routine.py +++ b/samples/get_routine.py @@ -19,7 +19,6 @@ def get_routine(routine_id: str) -> "bigquery.Routine": - # [START bigquery_get_routine] from google.cloud import bigquery diff --git a/samples/get_table.py b/samples/get_table.py index 6195aaf9a..f71db7cee 100644 --- a/samples/get_table.py +++ b/samples/get_table.py @@ -14,7 +14,6 @@ def get_table(table_id: str) -> None: - # [START bigquery_get_table] from google.cloud import bigquery diff --git a/samples/label_dataset.py b/samples/label_dataset.py index a59743e5d..4fcc3dcd8 100644 --- a/samples/label_dataset.py +++ b/samples/label_dataset.py @@ -14,7 +14,6 @@ def label_dataset(dataset_id: str) -> None: - # [START bigquery_label_dataset] from google.cloud import bigquery diff --git a/samples/list_datasets.py b/samples/list_datasets.py index c1b6639a9..d9401e9ae 100644 --- a/samples/list_datasets.py +++ b/samples/list_datasets.py @@ -14,7 +14,6 @@ def list_datasets() -> None: - # [START bigquery_list_datasets] from google.cloud import bigquery diff --git a/samples/list_datasets_by_label.py b/samples/list_datasets_by_label.py index d1f264872..3a2bef632 100644 --- a/samples/list_datasets_by_label.py +++ b/samples/list_datasets_by_label.py @@ -14,7 +14,6 @@ def list_datasets_by_label() -> None: - # [START bigquery_list_datasets_by_label] from google.cloud import bigquery diff --git a/samples/list_routines.py b/samples/list_routines.py index bee7c23be..95ddd962e 100644 --- a/samples/list_routines.py +++ b/samples/list_routines.py @@ -14,7 +14,6 @@ def list_routines(dataset_id: str) -> None: - # [START bigquery_list_routines] from google.cloud import bigquery diff --git a/samples/list_tables.py b/samples/list_tables.py index df846961d..17c06370d 100644 --- a/samples/list_tables.py +++ b/samples/list_tables.py @@ -14,7 +14,6 @@ def list_tables(dataset_id: str) -> None: - # [START bigquery_list_tables] from google.cloud import bigquery diff --git a/samples/load_table_clustered.py b/samples/load_table_clustered.py index 87b6c76ce..749746882 100644 --- a/samples/load_table_clustered.py +++ b/samples/load_table_clustered.py @@ -19,7 +19,6 @@ def load_table_clustered(table_id: str) -> "bigquery.Table": - # [START bigquery_load_table_clustered] from google.cloud import bigquery diff --git a/samples/load_table_dataframe.py b/samples/load_table_dataframe.py index db4c131f2..2c668d183 100644 --- a/samples/load_table_dataframe.py +++ b/samples/load_table_dataframe.py @@ -19,7 +19,6 @@ def load_table_dataframe(table_id: str) -> "bigquery.Table": - # [START bigquery_load_table_dataframe] import datetime diff --git a/samples/load_table_file.py b/samples/load_table_file.py index 00226eb3c..838c3b105 100644 --- a/samples/load_table_file.py +++ b/samples/load_table_file.py @@ -19,7 +19,6 @@ def load_table_file(file_path: str, table_id: str) -> "bigquery.Table": - # [START bigquery_load_from_file] from google.cloud import bigquery diff --git a/samples/load_table_uri_autodetect_csv.py b/samples/load_table_uri_autodetect_csv.py index c412c63f1..ca4590581 100644 --- a/samples/load_table_uri_autodetect_csv.py +++ b/samples/load_table_uri_autodetect_csv.py @@ -14,7 +14,6 @@ def load_table_uri_autodetect_csv(table_id: str) -> None: - # [START bigquery_load_table_gcs_csv_autodetect] from google.cloud import bigquery diff --git a/samples/load_table_uri_autodetect_json.py b/samples/load_table_uri_autodetect_json.py index 9d0bc3f22..00e8dc1b2 100644 --- a/samples/load_table_uri_autodetect_json.py +++ b/samples/load_table_uri_autodetect_json.py @@ -14,7 +14,6 @@ def load_table_uri_autodetect_json(table_id: str) -> None: - # [START bigquery_load_table_gcs_json_autodetect] from google.cloud import bigquery diff --git a/samples/load_table_uri_avro.py b/samples/load_table_uri_avro.py index e9f7c39ed..a0e8c86a6 100644 --- a/samples/load_table_uri_avro.py +++ b/samples/load_table_uri_avro.py @@ -14,7 +14,6 @@ def load_table_uri_avro(table_id: str) -> None: - # [START bigquery_load_table_gcs_avro] from google.cloud import bigquery diff --git a/samples/load_table_uri_cmek.py b/samples/load_table_uri_cmek.py index 4dfc0d3b4..d54422028 100644 --- a/samples/load_table_uri_cmek.py +++ b/samples/load_table_uri_cmek.py @@ -14,7 +14,6 @@ def load_table_uri_cmek(table_id: str, kms_key_name: str) -> None: - # [START bigquery_load_table_gcs_json_cmek] from google.cloud import bigquery diff --git a/samples/load_table_uri_csv.py b/samples/load_table_uri_csv.py index 9cb8c6f20..d660a2195 100644 --- a/samples/load_table_uri_csv.py +++ b/samples/load_table_uri_csv.py @@ -14,7 +14,6 @@ def load_table_uri_csv(table_id: str) -> None: - # [START bigquery_load_table_gcs_csv] from google.cloud import bigquery diff --git a/samples/load_table_uri_orc.py b/samples/load_table_uri_orc.py index 7babd2630..c09129216 100644 --- a/samples/load_table_uri_orc.py +++ b/samples/load_table_uri_orc.py @@ -14,7 +14,6 @@ def load_table_uri_orc(table_id: str) -> None: - # [START bigquery_load_table_gcs_orc] from google.cloud import bigquery diff --git a/samples/load_table_uri_truncate_avro.py b/samples/load_table_uri_truncate_avro.py index 51c6636fa..307a4e4de 100644 --- a/samples/load_table_uri_truncate_avro.py +++ b/samples/load_table_uri_truncate_avro.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_avro(table_id: str) -> None: - # [START bigquery_load_table_gcs_avro_truncate] import io diff --git a/samples/load_table_uri_truncate_csv.py b/samples/load_table_uri_truncate_csv.py index ee8b34043..4bfd306cd 100644 --- a/samples/load_table_uri_truncate_csv.py +++ b/samples/load_table_uri_truncate_csv.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_csv(table_id: str) -> None: - # [START bigquery_load_table_gcs_csv_truncate] import io diff --git a/samples/load_table_uri_truncate_json.py b/samples/load_table_uri_truncate_json.py index e85e0808e..a05a3eda0 100644 --- a/samples/load_table_uri_truncate_json.py +++ b/samples/load_table_uri_truncate_json.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_json(table_id: str) -> None: - # [START bigquery_load_table_gcs_json_truncate] import io diff --git a/samples/load_table_uri_truncate_orc.py b/samples/load_table_uri_truncate_orc.py index c730099d1..1c704b745 100644 --- a/samples/load_table_uri_truncate_orc.py +++ b/samples/load_table_uri_truncate_orc.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_orc(table_id: str) -> None: - # [START bigquery_load_table_gcs_orc_truncate] import io diff --git a/samples/load_table_uri_truncate_parquet.py b/samples/load_table_uri_truncate_parquet.py index 3a0a55c8a..d74f79910 100644 --- a/samples/load_table_uri_truncate_parquet.py +++ b/samples/load_table_uri_truncate_parquet.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_parquet(table_id: str) -> None: - # [START bigquery_load_table_gcs_parquet_truncate] import io diff --git a/samples/query_external_gcs_temporary_table.py b/samples/query_external_gcs_temporary_table.py index 9bcb86aab..d622ab1a3 100644 --- a/samples/query_external_gcs_temporary_table.py +++ b/samples/query_external_gcs_temporary_table.py @@ -14,7 +14,6 @@ def query_external_gcs_temporary_table() -> None: - # [START bigquery_query_external_gcs_temp] from google.cloud import bigquery diff --git a/samples/query_external_sheets_permanent_table.py b/samples/query_external_sheets_permanent_table.py index a5855e66a..f23f44259 100644 --- a/samples/query_external_sheets_permanent_table.py +++ b/samples/query_external_sheets_permanent_table.py @@ -14,7 +14,6 @@ def query_external_sheets_permanent_table(dataset_id: str) -> None: - # [START bigquery_query_external_sheets_perm] from google.cloud import bigquery import google.auth diff --git a/samples/query_external_sheets_temporary_table.py b/samples/query_external_sheets_temporary_table.py index 944d3b826..876e4cc1a 100644 --- a/samples/query_external_sheets_temporary_table.py +++ b/samples/query_external_sheets_temporary_table.py @@ -14,7 +14,6 @@ def query_external_sheets_temporary_table() -> None: - # [START bigquery_query_external_sheets_temp] # [START bigquery_auth_drive_scope] from google.cloud import bigquery diff --git a/samples/query_no_cache.py b/samples/query_no_cache.py index f39c01dbc..7501b7fc0 100644 --- a/samples/query_no_cache.py +++ b/samples/query_no_cache.py @@ -14,7 +14,6 @@ def query_no_cache() -> None: - # [START bigquery_query_no_cache] from google.cloud import bigquery diff --git a/samples/query_pagination.py b/samples/query_pagination.py index 2e1654050..7ccaecff7 100644 --- a/samples/query_pagination.py +++ b/samples/query_pagination.py @@ -14,7 +14,6 @@ def query_pagination() -> None: - # [START bigquery_query_pagination] from google.cloud import bigquery diff --git a/samples/query_to_arrow.py b/samples/query_to_arrow.py index 157a93638..f1afc7c94 100644 --- a/samples/query_to_arrow.py +++ b/samples/query_to_arrow.py @@ -19,7 +19,6 @@ def query_to_arrow() -> "pyarrow.Table": - # [START bigquery_query_to_arrow] from google.cloud import bigquery diff --git a/samples/snippets/create_table_cmek_test.py b/samples/snippets/create_table_cmek_test.py index 429baf3fd..2b15fb350 100644 --- a/samples/snippets/create_table_cmek_test.py +++ b/samples/snippets/create_table_cmek_test.py @@ -24,7 +24,6 @@ def test_create_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - kms_key_name = ( "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/test" ) diff --git a/samples/snippets/create_table_external_data_configuration_test.py b/samples/snippets/create_table_external_data_configuration_test.py index 7bbcde32b..e97d7170d 100644 --- a/samples/snippets/create_table_external_data_configuration_test.py +++ b/samples/snippets/create_table_external_data_configuration_test.py @@ -24,7 +24,6 @@ def test_create_table_external_data_configuration( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - create_table_external_data_configuration.create_table_external_data_configuration( random_table_id ) diff --git a/samples/snippets/create_table_schema_from_json_test.py b/samples/snippets/create_table_schema_from_json_test.py index e99b92672..39b00cea0 100644 --- a/samples/snippets/create_table_schema_from_json_test.py +++ b/samples/snippets/create_table_schema_from_json_test.py @@ -24,7 +24,6 @@ def test_create_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - create_table_schema_from_json.create_table(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/create_table_snapshot_test.py b/samples/snippets/create_table_snapshot_test.py index f1d8d0f7b..784dc3ddd 100644 --- a/samples/snippets/create_table_snapshot_test.py +++ b/samples/snippets/create_table_snapshot_test.py @@ -25,7 +25,6 @@ def test_create_table_snapshot( table_id: str, random_table_id: str, ) -> None: - create_table_snapshot.create_table_snapshot(table_id, random_table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/delete_label_table_test.py b/samples/snippets/delete_label_table_test.py index 54acae77f..80fcbb695 100644 --- a/samples/snippets/delete_label_table_test.py +++ b/samples/snippets/delete_label_table_test.py @@ -24,7 +24,6 @@ def test_delete_label_table( capsys: "pytest.CaptureFixture[str]", table_id: str, ) -> None: - table = delete_label_table.delete_label_table(table_id, "color") out, _ = capsys.readouterr() diff --git a/samples/snippets/get_table_labels_test.py b/samples/snippets/get_table_labels_test.py index f922e728c..95a95b60f 100644 --- a/samples/snippets/get_table_labels_test.py +++ b/samples/snippets/get_table_labels_test.py @@ -42,7 +42,6 @@ def test_get_table_labels_no_label( capsys: "pytest.CaptureFixture[str]", table_id: str, ) -> None: - get_table_labels.get_table_labels(table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/label_table_test.py b/samples/snippets/label_table_test.py index a77fb4b75..98f3b3cc7 100644 --- a/samples/snippets/label_table_test.py +++ b/samples/snippets/label_table_test.py @@ -24,7 +24,6 @@ def test_label_table( capsys: "pytest.CaptureFixture[str]", table_id: str, ) -> None: - label_table.label_table(table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/load_table_schema_from_json_test.py b/samples/snippets/load_table_schema_from_json_test.py index 267a6786c..c28875b0e 100644 --- a/samples/snippets/load_table_schema_from_json_test.py +++ b/samples/snippets/load_table_schema_from_json_test.py @@ -24,7 +24,6 @@ def test_load_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - load_table_schema_from_json.load_table(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/nested_repeated_schema_test.py b/samples/snippets/nested_repeated_schema_test.py index 0386fc8fb..8bb8bda6a 100644 --- a/samples/snippets/nested_repeated_schema_test.py +++ b/samples/snippets/nested_repeated_schema_test.py @@ -24,7 +24,6 @@ def test_create_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - nested_repeated_schema.nested_schema(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/quickstart.py b/samples/snippets/quickstart.py index f9628da7d..8f7f05c73 100644 --- a/samples/snippets/quickstart.py +++ b/samples/snippets/quickstart.py @@ -18,7 +18,6 @@ def run_quickstart(override_values: Optional[Dict[str, str]] = None) -> None: - if override_values is None: override_values = {} diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py index 610c63c3b..98a5fdd4e 100644 --- a/samples/snippets/quickstart_test.py +++ b/samples/snippets/quickstart_test.py @@ -43,7 +43,6 @@ def test_quickstart( client: bigquery.Client, datasets_to_delete: List[str], ) -> None: - override_values = { "dataset_id": "my_new_dataset_{}".format(str(uuid.uuid4()).replace("-", "_")), } diff --git a/samples/snippets/update_table_expiration_test.py b/samples/snippets/update_table_expiration_test.py index 721bf53aa..1566c7763 100644 --- a/samples/snippets/update_table_expiration_test.py +++ b/samples/snippets/update_table_expiration_test.py @@ -28,7 +28,6 @@ def test_update_table_expiration( table_id: str, tmp_path: "pathlib.Path", ) -> None: - # This was not needed for function, only for test expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( days=5 diff --git a/samples/snippets/view.py b/samples/snippets/view.py index 5e976f68a..94f406890 100644 --- a/samples/snippets/view.py +++ b/samples/snippets/view.py @@ -127,7 +127,6 @@ def update_view(override_values: Optional[Dict[str, str]] = None) -> "bigquery.T def grant_access( override_values: Optional[OverridesDict] = None, ) -> Tuple["bigquery.Dataset", "bigquery.Dataset"]: - if override_values is None: override_values = {} diff --git a/samples/table_exists.py b/samples/table_exists.py index 6edba9239..c19d419ae 100644 --- a/samples/table_exists.py +++ b/samples/table_exists.py @@ -14,7 +14,6 @@ def table_exists(table_id: str) -> None: - # [START bigquery_table_exists] from google.cloud import bigquery from google.cloud.exceptions import NotFound diff --git a/samples/table_insert_rows.py b/samples/table_insert_rows.py index 8aa723fe0..d680b4c1e 100644 --- a/samples/table_insert_rows.py +++ b/samples/table_insert_rows.py @@ -14,7 +14,6 @@ def table_insert_rows(table_id: str) -> None: - # [START bigquery_table_insert_rows] from google.cloud import bigquery diff --git a/samples/table_insert_rows_explicit_none_insert_ids.py b/samples/table_insert_rows_explicit_none_insert_ids.py index b2bd06372..bbde034f7 100644 --- a/samples/table_insert_rows_explicit_none_insert_ids.py +++ b/samples/table_insert_rows_explicit_none_insert_ids.py @@ -14,7 +14,6 @@ def table_insert_rows_explicit_none_insert_ids(table_id: str) -> None: - # [START bigquery_table_insert_rows_explicit_none_insert_ids] from google.cloud import bigquery diff --git a/samples/tests/test_add_empty_column.py b/samples/tests/test_add_empty_column.py index 5c7184766..95d554621 100644 --- a/samples/tests/test_add_empty_column.py +++ b/samples/tests/test_add_empty_column.py @@ -21,7 +21,6 @@ def test_add_empty_column(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None: - add_empty_column.add_empty_column(table_id) out, err = capsys.readouterr() assert "A new column has been added." in out diff --git a/samples/tests/test_browse_table_data.py b/samples/tests/test_browse_table_data.py index 368e5cad6..670eb7ccf 100644 --- a/samples/tests/test_browse_table_data.py +++ b/samples/tests/test_browse_table_data.py @@ -23,7 +23,6 @@ def test_browse_table_data( capsys: "pytest.CaptureFixture[str]", table_with_data_id: str ) -> None: - browse_table_data.browse_table_data(table_with_data_id) out, err = capsys.readouterr() assert "Downloaded 164656 rows from table {}".format(table_with_data_id) in out diff --git a/samples/tests/test_client_list_jobs.py b/samples/tests/test_client_list_jobs.py index a2845b7ad..6bb1bbd19 100644 --- a/samples/tests/test_client_list_jobs.py +++ b/samples/tests/test_client_list_jobs.py @@ -25,7 +25,6 @@ def test_client_list_jobs( capsys: "pytest.CaptureFixture[str]", client: "bigquery.Client" ) -> None: - job = create_job.create_job() client.cancel_job(job.job_id) job.cancel() diff --git a/samples/tests/test_client_load_partitioned_table.py b/samples/tests/test_client_load_partitioned_table.py index 24f86c700..2f6564afa 100644 --- a/samples/tests/test_client_load_partitioned_table.py +++ b/samples/tests/test_client_load_partitioned_table.py @@ -23,7 +23,6 @@ def test_client_load_partitioned_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - client_load_partitioned_table.client_load_partitioned_table(random_table_id) out, err = capsys.readouterr() assert "Loaded 50 rows to table {}".format(random_table_id) in out diff --git a/samples/tests/test_client_query.py b/samples/tests/test_client_query.py index a8e3c343e..5d4fb9c94 100644 --- a/samples/tests/test_client_query.py +++ b/samples/tests/test_client_query.py @@ -21,7 +21,6 @@ def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None: - client_query.client_query() out, err = capsys.readouterr() assert "The query data:" in out diff --git a/samples/tests/test_client_query_add_column.py b/samples/tests/test_client_query_add_column.py index 1eb5a1ed6..c80f195a5 100644 --- a/samples/tests/test_client_query_add_column.py +++ b/samples/tests/test_client_query_add_column.py @@ -25,7 +25,6 @@ def test_client_query_add_column( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/samples/tests/test_client_query_batch.py b/samples/tests/test_client_query_batch.py index 548fe3ac3..b1e0e2647 100644 --- a/samples/tests/test_client_query_batch.py +++ b/samples/tests/test_client_query_batch.py @@ -21,7 +21,6 @@ def test_client_query_batch(capsys: "pytest.CaptureFixture[str]") -> None: - job = client_query_batch.client_query_batch() out, err = capsys.readouterr() assert "Job {} is currently in state {}".format(job.job_id, job.state) in out diff --git a/samples/tests/test_client_query_destination_table.py b/samples/tests/test_client_query_destination_table.py index 067bc16ec..1487f6e65 100644 --- a/samples/tests/test_client_query_destination_table.py +++ b/samples/tests/test_client_query_destination_table.py @@ -23,7 +23,6 @@ def test_client_query_destination_table( capsys: "pytest.CaptureFixture[str]", table_id: str ) -> None: - client_query_destination_table.client_query_destination_table(table_id) out, err = capsys.readouterr() assert "Query results loaded to the table {}".format(table_id) in out diff --git a/samples/tests/test_client_query_destination_table_clustered.py b/samples/tests/test_client_query_destination_table_clustered.py index 02b131531..8a1e5bcd4 100644 --- a/samples/tests/test_client_query_destination_table_clustered.py +++ b/samples/tests/test_client_query_destination_table_clustered.py @@ -23,7 +23,6 @@ def test_client_query_destination_table_clustered( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - client_query_destination_table_clustered.client_query_destination_table_clustered( random_table_id ) diff --git a/samples/tests/test_client_query_destination_table_cmek.py b/samples/tests/test_client_query_destination_table_cmek.py index f2fe3bc39..4cb76be8e 100644 --- a/samples/tests/test_client_query_destination_table_cmek.py +++ b/samples/tests/test_client_query_destination_table_cmek.py @@ -23,7 +23,6 @@ def test_client_query_destination_table_cmek( capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str ) -> None: - client_query_destination_table_cmek.client_query_destination_table_cmek( random_table_id, kms_key_name ) diff --git a/samples/tests/test_client_query_destination_table_legacy.py b/samples/tests/test_client_query_destination_table_legacy.py index 0071ee4a4..78a199bea 100644 --- a/samples/tests/test_client_query_destination_table_legacy.py +++ b/samples/tests/test_client_query_destination_table_legacy.py @@ -23,7 +23,6 @@ def test_client_query_destination_table_legacy( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - client_query_destination_table_legacy.client_query_destination_table_legacy( random_table_id ) diff --git a/samples/tests/test_client_query_dry_run.py b/samples/tests/test_client_query_dry_run.py index cffb152ef..cfc8100a1 100644 --- a/samples/tests/test_client_query_dry_run.py +++ b/samples/tests/test_client_query_dry_run.py @@ -21,7 +21,6 @@ def test_client_query_dry_run(capsys: "pytest.CaptureFixture[str]") -> None: - query_job = client_query_dry_run.client_query_dry_run() out, err = capsys.readouterr() assert "This query will process" in out diff --git a/samples/tests/test_client_query_legacy_sql.py b/samples/tests/test_client_query_legacy_sql.py index b12b5a934..98303cde9 100644 --- a/samples/tests/test_client_query_legacy_sql.py +++ b/samples/tests/test_client_query_legacy_sql.py @@ -22,7 +22,6 @@ def test_client_query_legacy_sql(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_legacy_sql.client_query_legacy_sql() out, err = capsys.readouterr() assert re.search(r"(Row[\w(){}:', ]+)$", out) diff --git a/samples/tests/test_client_query_relax_column.py b/samples/tests/test_client_query_relax_column.py index 93fa0f3cf..0df8463be 100644 --- a/samples/tests/test_client_query_relax_column.py +++ b/samples/tests/test_client_query_relax_column.py @@ -27,7 +27,6 @@ def test_client_query_relax_column( random_table_id: str, client: bigquery.Client, ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/samples/tests/test_client_query_w_array_params.py b/samples/tests/test_client_query_w_array_params.py index fcd3f6972..c302712fe 100644 --- a/samples/tests/test_client_query_w_array_params.py +++ b/samples/tests/test_client_query_w_array_params.py @@ -21,7 +21,6 @@ def test_client_query_w_array_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_array_params.client_query_w_array_params() out, err = capsys.readouterr() assert "James" in out diff --git a/samples/tests/test_client_query_w_named_params.py b/samples/tests/test_client_query_w_named_params.py index 85ef1dc4a..e4d66be41 100644 --- a/samples/tests/test_client_query_w_named_params.py +++ b/samples/tests/test_client_query_w_named_params.py @@ -21,7 +21,6 @@ def test_client_query_w_named_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_named_params.client_query_w_named_params() out, err = capsys.readouterr() assert "the" in out diff --git a/samples/tests/test_client_query_w_positional_params.py b/samples/tests/test_client_query_w_positional_params.py index 8ade676ab..61df76aaa 100644 --- a/samples/tests/test_client_query_w_positional_params.py +++ b/samples/tests/test_client_query_w_positional_params.py @@ -21,7 +21,6 @@ def test_client_query_w_positional_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_positional_params.client_query_w_positional_params() out, err = capsys.readouterr() assert "the" in out diff --git a/samples/tests/test_client_query_w_struct_params.py b/samples/tests/test_client_query_w_struct_params.py index 3198dbad5..5eea993ce 100644 --- a/samples/tests/test_client_query_w_struct_params.py +++ b/samples/tests/test_client_query_w_struct_params.py @@ -21,7 +21,6 @@ def test_client_query_w_struct_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_struct_params.client_query_w_struct_params() out, err = capsys.readouterr() assert "1" in out diff --git a/samples/tests/test_client_query_w_timestamp_params.py b/samples/tests/test_client_query_w_timestamp_params.py index a3bbccdd4..8147d4a96 100644 --- a/samples/tests/test_client_query_w_timestamp_params.py +++ b/samples/tests/test_client_query_w_timestamp_params.py @@ -21,7 +21,6 @@ def test_client_query_w_timestamp_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_timestamp_params.client_query_w_timestamp_params() out, err = capsys.readouterr() assert "2016, 12, 7, 9, 0" in out diff --git a/samples/tests/test_copy_table_multiple_source.py b/samples/tests/test_copy_table_multiple_source.py index e8b27d2a9..5d7991c91 100644 --- a/samples/tests/test_copy_table_multiple_source.py +++ b/samples/tests/test_copy_table_multiple_source.py @@ -29,7 +29,6 @@ def test_copy_table_multiple_source( random_dataset_id: str, client: bigquery.Client, ) -> None: - dataset = bigquery.Dataset(random_dataset_id) dataset.location = "US" dataset = client.create_dataset(dataset) diff --git a/samples/tests/test_create_dataset.py b/samples/tests/test_create_dataset.py index e7a897f8f..ecf5ef129 100644 --- a/samples/tests/test_create_dataset.py +++ b/samples/tests/test_create_dataset.py @@ -23,7 +23,6 @@ def test_create_dataset( capsys: "pytest.CaptureFixture[str]", random_dataset_id: str ) -> None: - create_dataset.create_dataset(random_dataset_id) out, err = capsys.readouterr() assert "Created dataset {}".format(random_dataset_id) in out diff --git a/samples/tests/test_dataset_exists.py b/samples/tests/test_dataset_exists.py index bfef4368f..744122e37 100644 --- a/samples/tests/test_dataset_exists.py +++ b/samples/tests/test_dataset_exists.py @@ -27,7 +27,6 @@ def test_dataset_exists( random_dataset_id: str, client: bigquery.Client, ) -> None: - dataset_exists.dataset_exists(random_dataset_id) out, err = capsys.readouterr() assert "Dataset {} is not found".format(random_dataset_id) in out diff --git a/samples/tests/test_dataset_label_samples.py b/samples/tests/test_dataset_label_samples.py index 75a024856..ec9ff9228 100644 --- a/samples/tests/test_dataset_label_samples.py +++ b/samples/tests/test_dataset_label_samples.py @@ -25,7 +25,6 @@ def test_dataset_label_samples( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - label_dataset.label_dataset(dataset_id) out, err = capsys.readouterr() assert "Labels added to {}".format(dataset_id) in out diff --git a/samples/tests/test_delete_dataset.py b/samples/tests/test_delete_dataset.py index 9347bf185..c2a77c475 100644 --- a/samples/tests/test_delete_dataset.py +++ b/samples/tests/test_delete_dataset.py @@ -21,7 +21,6 @@ def test_delete_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None: - delete_dataset.delete_dataset(dataset_id) out, err = capsys.readouterr() assert "Deleted dataset '{}'.".format(dataset_id) in out diff --git a/samples/tests/test_delete_table.py b/samples/tests/test_delete_table.py index aca2df62f..5ba5622e8 100644 --- a/samples/tests/test_delete_table.py +++ b/samples/tests/test_delete_table.py @@ -21,7 +21,6 @@ def test_delete_table(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None: - delete_table.delete_table(table_id) out, err = capsys.readouterr() assert "Deleted table '{}'.".format(table_id) in out diff --git a/samples/tests/test_get_dataset.py b/samples/tests/test_get_dataset.py index 97b30541b..07c7a28b7 100644 --- a/samples/tests/test_get_dataset.py +++ b/samples/tests/test_get_dataset.py @@ -21,7 +21,6 @@ def test_get_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None: - get_dataset.get_dataset(dataset_id) out, err = capsys.readouterr() assert dataset_id in out diff --git a/samples/tests/test_get_table.py b/samples/tests/test_get_table.py index e6383010f..edf09762d 100644 --- a/samples/tests/test_get_table.py +++ b/samples/tests/test_get_table.py @@ -25,7 +25,6 @@ def test_get_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/samples/tests/test_list_tables.py b/samples/tests/test_list_tables.py index 7c726accc..c8a66b656 100644 --- a/samples/tests/test_list_tables.py +++ b/samples/tests/test_list_tables.py @@ -23,7 +23,6 @@ def test_list_tables( capsys: "pytest.CaptureFixture[str]", dataset_id: str, table_id: str ) -> None: - list_tables.list_tables(dataset_id) out, err = capsys.readouterr() assert "Tables contained in '{}':".format(dataset_id) in out diff --git a/samples/tests/test_load_table_clustered.py b/samples/tests/test_load_table_clustered.py index bbf3c671f..89059271a 100644 --- a/samples/tests/test_load_table_clustered.py +++ b/samples/tests/test_load_table_clustered.py @@ -26,7 +26,6 @@ def test_load_table_clustered( random_table_id: str, client: "bigquery.Client", ) -> None: - table = load_table_clustered.load_table_clustered(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/tests/test_load_table_dataframe.py b/samples/tests/test_load_table_dataframe.py index 9a975493c..4aa872fa4 100644 --- a/samples/tests/test_load_table_dataframe.py +++ b/samples/tests/test_load_table_dataframe.py @@ -31,7 +31,6 @@ def test_load_table_dataframe( client: "bigquery.Client", random_table_id: str, ) -> None: - table = load_table_dataframe.load_table_dataframe(random_table_id) out, _ = capsys.readouterr() expected_column_names = [ diff --git a/samples/tests/test_load_table_uri_autodetect_csv.py b/samples/tests/test_load_table_uri_autodetect_csv.py index c9b410850..46b593713 100644 --- a/samples/tests/test_load_table_uri_autodetect_csv.py +++ b/samples/tests/test_load_table_uri_autodetect_csv.py @@ -23,7 +23,6 @@ def test_load_table_uri_autodetect_csv( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_autodetect_csv.load_table_uri_autodetect_csv(random_table_id) out, err = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_autodetect_json.py b/samples/tests/test_load_table_uri_autodetect_json.py index 2c68a13db..43bf4e1b3 100644 --- a/samples/tests/test_load_table_uri_autodetect_json.py +++ b/samples/tests/test_load_table_uri_autodetect_json.py @@ -23,7 +23,6 @@ def test_load_table_uri_autodetect_csv( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_autodetect_json.load_table_uri_autodetect_json(random_table_id) out, err = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_cmek.py b/samples/tests/test_load_table_uri_cmek.py index 1eb873843..1ae8689f9 100644 --- a/samples/tests/test_load_table_uri_cmek.py +++ b/samples/tests/test_load_table_uri_cmek.py @@ -23,7 +23,6 @@ def test_load_table_uri_cmek( capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str ) -> None: - load_table_uri_cmek.load_table_uri_cmek(random_table_id, kms_key_name) out, _ = capsys.readouterr() assert "A table loaded with encryption configuration key" in out diff --git a/samples/tests/test_load_table_uri_csv.py b/samples/tests/test_load_table_uri_csv.py index a57224c84..8b4c733e8 100644 --- a/samples/tests/test_load_table_uri_csv.py +++ b/samples/tests/test_load_table_uri_csv.py @@ -23,7 +23,6 @@ def test_load_table_uri_csv( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_csv.load_table_uri_csv(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_json.py b/samples/tests/test_load_table_uri_json.py index 3ad0ce29b..751c3867a 100644 --- a/samples/tests/test_load_table_uri_json.py +++ b/samples/tests/test_load_table_uri_json.py @@ -23,7 +23,6 @@ def test_load_table_uri_json( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_json.load_table_uri_json(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_orc.py b/samples/tests/test_load_table_uri_orc.py index f31e8cabb..23d8288b7 100644 --- a/samples/tests/test_load_table_uri_orc.py +++ b/samples/tests/test_load_table_uri_orc.py @@ -23,7 +23,6 @@ def test_load_table_uri_orc( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_orc.load_table_uri_orc(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_parquet.py b/samples/tests/test_load_table_uri_parquet.py index 5404e8584..ee7682388 100644 --- a/samples/tests/test_load_table_uri_parquet.py +++ b/samples/tests/test_load_table_uri_parquet.py @@ -23,7 +23,6 @@ def test_load_table_uri_json( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_parquet.load_table_uri_parquet(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_query_external_gcs_temporary_table.py b/samples/tests/test_query_external_gcs_temporary_table.py index 9590f3d7a..75b3ce6d8 100644 --- a/samples/tests/test_query_external_gcs_temporary_table.py +++ b/samples/tests/test_query_external_gcs_temporary_table.py @@ -23,7 +23,6 @@ def test_query_external_gcs_temporary_table( capsys: "pytest.CaptureFixture[str]", ) -> None: - query_external_gcs_temporary_table.query_external_gcs_temporary_table() out, err = capsys.readouterr() assert "There are 4 states with names starting with W." in out diff --git a/samples/tests/test_query_external_sheets_permanent_table.py b/samples/tests/test_query_external_sheets_permanent_table.py index 851839054..1a4c21330 100644 --- a/samples/tests/test_query_external_sheets_permanent_table.py +++ b/samples/tests/test_query_external_sheets_permanent_table.py @@ -23,7 +23,6 @@ def test_query_external_sheets_permanent_table( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - query_external_sheets_permanent_table.query_external_sheets_permanent_table( dataset_id ) diff --git a/samples/tests/test_query_external_sheets_temporary_table.py b/samples/tests/test_query_external_sheets_temporary_table.py index 58e0cb394..2ada20566 100644 --- a/samples/tests/test_query_external_sheets_temporary_table.py +++ b/samples/tests/test_query_external_sheets_temporary_table.py @@ -23,7 +23,6 @@ def test_query_external_sheets_temporary_table( capsys: "pytest.CaptureFixture[str]", ) -> None: - query_external_sheets_temporary_table.query_external_sheets_temporary_table() out, err = capsys.readouterr() assert "There are 2 states with names starting with W in the selected range." in out diff --git a/samples/tests/test_query_no_cache.py b/samples/tests/test_query_no_cache.py index f3fb039c9..fffa5dac7 100644 --- a/samples/tests/test_query_no_cache.py +++ b/samples/tests/test_query_no_cache.py @@ -22,7 +22,6 @@ def test_query_no_cache(capsys: "pytest.CaptureFixture[str]") -> None: - query_no_cache.query_no_cache() out, err = capsys.readouterr() assert re.search(r"(Row[\w(){}:', ]+)$", out) diff --git a/samples/tests/test_query_pagination.py b/samples/tests/test_query_pagination.py index daf711e49..adc946399 100644 --- a/samples/tests/test_query_pagination.py +++ b/samples/tests/test_query_pagination.py @@ -21,7 +21,6 @@ def test_query_pagination(capsys: "pytest.CaptureFixture[str]") -> None: - query_pagination.query_pagination() out, _ = capsys.readouterr() assert "The query data:" in out diff --git a/samples/tests/test_query_script.py b/samples/tests/test_query_script.py index 98dd1253b..50c973024 100644 --- a/samples/tests/test_query_script.py +++ b/samples/tests/test_query_script.py @@ -21,7 +21,6 @@ def test_query_script(capsys: "pytest.CaptureFixture[str]") -> None: - query_script.query_script() out, _ = capsys.readouterr() assert "Script created 2 child jobs." in out diff --git a/samples/tests/test_query_to_arrow.py b/samples/tests/test_query_to_arrow.py index d9b1aeb73..9fc8571e9 100644 --- a/samples/tests/test_query_to_arrow.py +++ b/samples/tests/test_query_to_arrow.py @@ -20,7 +20,6 @@ def test_query_to_arrow(capsys: "pytest.CaptureFixture[str]") -> None: - arrow_table = query_to_arrow.query_to_arrow() out, err = capsys.readouterr() assert "Downloaded 8 rows, 2 columns." in out diff --git a/samples/tests/test_table_exists.py b/samples/tests/test_table_exists.py index 7317ba747..35cf61cc8 100644 --- a/samples/tests/test_table_exists.py +++ b/samples/tests/test_table_exists.py @@ -25,7 +25,6 @@ def test_table_exists( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - table_exists.table_exists(random_table_id) out, err = capsys.readouterr() assert "Table {} is not found.".format(random_table_id) in out diff --git a/samples/tests/test_table_insert_rows.py b/samples/tests/test_table_insert_rows.py index 59024fa95..13400d69c 100644 --- a/samples/tests/test_table_insert_rows.py +++ b/samples/tests/test_table_insert_rows.py @@ -27,7 +27,6 @@ def test_table_insert_rows( random_table_id: str, client: bigquery.Client, ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py index 00456ce84..c6bfbf392 100644 --- a/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py +++ b/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py @@ -25,7 +25,6 @@ def test_table_insert_rows_explicit_none_insert_ids( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/samples/tests/test_update_dataset_access.py b/samples/tests/test_update_dataset_access.py index 186a3b575..f17634fb0 100644 --- a/samples/tests/test_update_dataset_access.py +++ b/samples/tests/test_update_dataset_access.py @@ -23,7 +23,6 @@ def test_update_dataset_access( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - update_dataset_access.update_dataset_access(dataset_id) out, err = capsys.readouterr() assert ( diff --git a/samples/tests/test_update_dataset_default_partition_expiration.py b/samples/tests/test_update_dataset_default_partition_expiration.py index b7787dde3..4dd0d9296 100644 --- a/samples/tests/test_update_dataset_default_partition_expiration.py +++ b/samples/tests/test_update_dataset_default_partition_expiration.py @@ -23,7 +23,6 @@ def test_update_dataset_default_partition_expiration( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds update_dataset_default_partition_expiration.update_dataset_default_partition_expiration( diff --git a/samples/tests/test_update_dataset_default_table_expiration.py b/samples/tests/test_update_dataset_default_table_expiration.py index f780827f2..24df5446d 100644 --- a/samples/tests/test_update_dataset_default_table_expiration.py +++ b/samples/tests/test_update_dataset_default_table_expiration.py @@ -23,7 +23,6 @@ def test_update_dataset_default_table_expiration( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds update_dataset_default_table_expiration.update_dataset_default_table_expiration( diff --git a/samples/tests/test_update_dataset_description.py b/samples/tests/test_update_dataset_description.py index 5d1209e22..6d76337dc 100644 --- a/samples/tests/test_update_dataset_description.py +++ b/samples/tests/test_update_dataset_description.py @@ -23,7 +23,6 @@ def test_update_dataset_description( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - update_dataset_description.update_dataset_description(dataset_id) out, err = capsys.readouterr() assert "Updated description." in out diff --git a/samples/tests/test_update_table_require_partition_filter.py b/samples/tests/test_update_table_require_partition_filter.py index 68e1c1e2b..c86a22769 100644 --- a/samples/tests/test_update_table_require_partition_filter.py +++ b/samples/tests/test_update_table_require_partition_filter.py @@ -27,7 +27,6 @@ def test_update_table_require_partition_filter( random_table_id: str, client: bigquery.Client, ) -> None: - # Make a partitioned table. schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] table = bigquery.Table(random_table_id, schema=schema) diff --git a/samples/update_dataset_access.py b/samples/update_dataset_access.py index fda784da5..2fb21aff2 100644 --- a/samples/update_dataset_access.py +++ b/samples/update_dataset_access.py @@ -14,7 +14,6 @@ def update_dataset_access(dataset_id: str) -> None: - # [START bigquery_update_dataset_access] from google.cloud import bigquery diff --git a/samples/update_dataset_default_partition_expiration.py b/samples/update_dataset_default_partition_expiration.py index 37456f3a0..7a3ccaca3 100644 --- a/samples/update_dataset_default_partition_expiration.py +++ b/samples/update_dataset_default_partition_expiration.py @@ -14,7 +14,6 @@ def update_dataset_default_partition_expiration(dataset_id: str) -> None: - # [START bigquery_update_dataset_partition_expiration] from google.cloud import bigquery diff --git a/samples/update_dataset_default_table_expiration.py b/samples/update_dataset_default_table_expiration.py index cf6f50d9f..ccd0d979e 100644 --- a/samples/update_dataset_default_table_expiration.py +++ b/samples/update_dataset_default_table_expiration.py @@ -14,7 +14,6 @@ def update_dataset_default_table_expiration(dataset_id: str) -> None: - # [START bigquery_update_dataset_expiration] from google.cloud import bigquery diff --git a/samples/update_dataset_description.py b/samples/update_dataset_description.py index 98c5fed43..b12baa999 100644 --- a/samples/update_dataset_description.py +++ b/samples/update_dataset_description.py @@ -14,7 +14,6 @@ def update_dataset_description(dataset_id: str) -> None: - # [START bigquery_update_dataset_description] from google.cloud import bigquery diff --git a/samples/update_routine.py b/samples/update_routine.py index 1a975a253..1a8908295 100644 --- a/samples/update_routine.py +++ b/samples/update_routine.py @@ -19,7 +19,6 @@ def update_routine(routine_id: str) -> "bigquery.Routine": - # [START bigquery_update_routine] from google.cloud import bigquery diff --git a/samples/update_table_require_partition_filter.py b/samples/update_table_require_partition_filter.py index 8221238a7..40b739b76 100644 --- a/samples/update_table_require_partition_filter.py +++ b/samples/update_table_require_partition_filter.py @@ -14,7 +14,6 @@ def update_table_require_partition_filter(table_id: str) -> None: - # [START bigquery_update_table_require_partition_filter] from google.cloud import bigquery diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 8fd532f4c..d3b95ec49 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2319,7 +2319,6 @@ def _table_exists(t): def test_dbapi_create_view(dataset_id: str): - query = f""" CREATE VIEW {dataset_id}.dbapi_create_view AS SELECT name, SUM(number) AS total diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 7d3186d47..26f1f2a73 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -54,7 +54,6 @@ def _make_resource(self, started=False, ended=False, location="US"): return resource def _verifyBooleanResourceProperties(self, job, config): - if "allowLargeResults" in config: self.assertEqual(job.allow_large_results, config["allowLargeResults"]) else: diff --git a/tests/unit/routine/test_remote_function_options.py b/tests/unit/routine/test_remote_function_options.py index b476dca1e..ffd57e8c1 100644 --- a/tests/unit/routine/test_remote_function_options.py +++ b/tests/unit/routine/test_remote_function_options.py @@ -32,7 +32,6 @@ def target_class(): def test_ctor(target_class): - options = target_class( endpoint=ENDPOINT, connection=CONNECTION, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index faa073dce..faa065116 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -111,7 +111,6 @@ def _make_list_partitons_meta_info(project, dataset_id, table_id, num_rows=0): class TestClient(unittest.TestCase): - PROJECT = "PROJECT" DS_ID = "DATASET_ID" TABLE_ID = "TABLE_ID" @@ -170,7 +169,6 @@ def test_ctor_w_empty_client_options(self): ) def test_ctor_w_client_options_dict(self): - creds = _make_credentials() http = object() client_options = {"api_endpoint": "https://www.foo-googleapis.com"} diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 3b1452805..7d7091092 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -693,7 +693,6 @@ def _verify_access_entry(self, access_entries, resource): self.assertEqual(a_entry.entity_id, r_entry["entity_id"]) def _verify_readonly_resource_properties(self, dataset, resource): - self.assertEqual(dataset.project, self.PROJECT) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.reference.project, self.PROJECT) @@ -717,7 +716,6 @@ def _verify_readonly_resource_properties(self, dataset, resource): self.assertIsNone(dataset.self_link) def _verify_resource_properties(self, dataset, resource): - self._verify_readonly_resource_properties(dataset, resource) if "defaultTableExpirationMs" in resource: diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index fae0c17e9..542f923d2 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -255,7 +255,6 @@ def test_non_empty_iterable(self): class TestRaiseOnClosedDecorator(unittest.TestCase): def _make_class(self): class Foo(object): - class_member = "class member" def __init__(self): diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 67fd13fa7..9fd16e699 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -21,7 +21,6 @@ class TestExternalConfig(unittest.TestCase): - SOURCE_URIS = ["gs://foo", "gs://bar"] BASE_RESOURCE = { diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index c0aa5d85e..70bfc4d0c 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -638,9 +638,9 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, warnings.catch_warnings( - record=True - ) as warned: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), warnings.catch_warnings(record=True) as warned: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--use_bqstorage_api", sql) @@ -801,7 +801,9 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(): with pytest.raises( OSError - ), client_query_patch as client_query_mock, default_patch, close_transports_patch as close_transports: + ), client_query_patch as client_query_mock, ( + default_patch + ), close_transports_patch as close_transports: client_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", "--max_results=5", sql) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index f31dc5528..a2c82c0a8 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -416,7 +416,6 @@ def test___str__(self): class TestTable(unittest.TestCase, _SchemaBase): - PROJECT = "prahj-ekt" DS_ID = "dataset-name" TABLE_NAME = "table-name" @@ -524,7 +523,6 @@ def _verifyReadonlyResourceProperties(self, table, resource): ) def _verifyResourceProperties(self, table, resource): - self._verifyReadonlyResourceProperties(table, resource) if "expirationTime" in resource: @@ -1501,7 +1499,6 @@ def test___str__(self): class Test_row_from_mapping(unittest.TestCase, _SchemaBase): - PROJECT = "prahj-ekt" DS_ID = "dataset-name" TABLE_NAME = "table-name" @@ -1790,7 +1787,6 @@ def _make_table_list_item(*args, **kwargs): return TableListItem(*args, **kwargs) def test_table_eq_table_ref(self): - table = self._make_table("project_foo.dataset_bar.table_baz") dataset_ref = DatasetReference("project_foo", "dataset_bar") table_ref = self._make_table_ref(dataset_ref, "table_baz") @@ -1814,7 +1810,6 @@ def test_table_eq_table_list_item(self): assert table_list_item == table def test_table_ref_eq_table_list_item(self): - dataset_ref = DatasetReference("project_foo", "dataset_bar") table_ref = self._make_table_ref(dataset_ref, "table_baz") table_list_item = self._make_table_list_item( From 5ceed056482f6d1f2fc45e7e6b84382de45c85ed Mon Sep 17 00:00:00 2001 From: Salem <115185670+SalemJorden@users.noreply.github.com> Date: Thu, 12 Oct 2023 10:29:00 -0500 Subject: [PATCH 168/536] feat: add `Model.transform_columns` property (#1661) --------- Co-authored-by: Salem Boyland Co-authored-by: Tim Swast --- google/cloud/bigquery/model.py | 71 ++++++++++++++++++++++++++++++++++ mypy.ini | 2 +- tests/unit/model/test_model.py | 68 ++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 4d2bc346c..45a88ab22 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -16,6 +16,8 @@ """Define resources for the BigQuery ML Models API.""" +from __future__ import annotations # type: ignore + import copy import datetime import typing @@ -184,6 +186,21 @@ def feature_columns(self) -> Sequence[standard_sql.StandardSqlField]: standard_sql.StandardSqlField.from_api_repr(column) for column in resource ] + @property + def transform_columns(self) -> Sequence[TransformColumn]: + """The input feature columns that were used to train this model. + The output transform columns used to train this model. + + See REST API: + https://cloud.google.com/bigquery/docs/reference/rest/v2/models#transformcolumn + + Read-only. + """ + resources: Sequence[Dict[str, Any]] = typing.cast( + Sequence[Dict[str, Any]], self._properties.get("transformColumns", []) + ) + return [TransformColumn(resource) for resource in resources] + @property def label_columns(self) -> Sequence[standard_sql.StandardSqlField]: """Label columns that were used to train this model. @@ -434,6 +451,60 @@ def __repr__(self): ) +class TransformColumn: + """TransformColumn represents a transform column feature. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/models#transformcolumn + + Args: + resource: + A dictionary representing a transform column feature. + """ + + def __init__(self, resource: Dict[str, Any]): + self._properties = resource + + @property + def name(self) -> Optional[str]: + """Name of the column.""" + return self._properties.get("name") + + @property + def type_(self) -> Optional[standard_sql.StandardSqlDataType]: + """Data type of the column after the transform. + + Returns: + Optional[google.cloud.bigquery.standard_sql.StandardSqlDataType]: + Data type of the column. + """ + type_json = self._properties.get("type") + if type_json is None: + return None + return standard_sql.StandardSqlDataType.from_api_repr(type_json) + + @property + def transform_sql(self) -> Optional[str]: + """The SQL expression used in the column transform.""" + return self._properties.get("transformSql") + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "TransformColumn": + """Constructs a transform column feature given its API representation + + Args: + resource: + Transform column feature representation from the API + + Returns: + Transform column feature parsed from ``resource``. + """ + this = cls({}) + resource = copy.deepcopy(resource) + this._properties = resource + return this + + def _model_arg_to_model_ref(value, default_project=None): """Helper to convert a string or Model to ModelReference. diff --git a/mypy.ini b/mypy.ini index 4505b4854..beaa679a8 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,3 +1,3 @@ [mypy] -python_version = 3.6 +python_version = 3.8 namespace_packages = True diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py index 1ae988414..279a954c7 100644 --- a/tests/unit/model/test_model.py +++ b/tests/unit/model/test_model.py @@ -18,7 +18,9 @@ import pytest + import google.cloud._helpers +import google.cloud.bigquery.model KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" @@ -136,6 +138,7 @@ def test_from_api_repr(target_class): google.cloud._helpers._rfc3339_to_datetime(got.training_runs[2]["startTime"]) == expiration_time ) + assert got.transform_columns == [] def test_from_api_repr_w_minimal_resource(target_class): @@ -293,6 +296,71 @@ def test_feature_columns(object_under_test): assert object_under_test.feature_columns == expected +def test_from_api_repr_w_transform_columns(target_class): + resource = { + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + }, + "transformColumns": [ + { + "name": "transform_name", + "type": {"typeKind": "INT64"}, + "transformSql": "transform_sql", + } + ], + } + got = target_class.from_api_repr(resource) + assert len(got.transform_columns) == 1 + transform_column = got.transform_columns[0] + assert isinstance(transform_column, google.cloud.bigquery.model.TransformColumn) + assert transform_column.name == "transform_name" + + +def test_transform_column_name(): + transform_columns = google.cloud.bigquery.model.TransformColumn( + {"name": "is_female"} + ) + assert transform_columns.name == "is_female" + + +def test_transform_column_transform_sql(): + transform_columns = google.cloud.bigquery.model.TransformColumn( + {"transformSql": "is_female"} + ) + assert transform_columns.transform_sql == "is_female" + + +def test_transform_column_type(): + transform_columns = google.cloud.bigquery.model.TransformColumn( + {"type": {"typeKind": "BOOL"}} + ) + assert transform_columns.type_.type_kind == "BOOL" + + +def test_transform_column_type_none(): + transform_columns = google.cloud.bigquery.model.TransformColumn({}) + assert transform_columns.type_ is None + + +def test_transform_column_from_api_repr_with_unknown_properties(): + transform_column = google.cloud.bigquery.model.TransformColumn.from_api_repr( + { + "name": "is_female", + "type": {"typeKind": "BOOL"}, + "transformSql": "is_female", + "test": "one", + } + ) + assert transform_column._properties == { + "name": "is_female", + "type": {"typeKind": "BOOL"}, + "transformSql": "is_female", + "test": "one", + } + + def test_label_columns(object_under_test): from google.cloud.bigquery import standard_sql From f637e5e38bc6c8d45ac83eeeaf4a59d7ac6b5d49 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 18 Oct 2023 12:56:17 -0400 Subject: [PATCH 169/536] chore: consolidate PyarrowVersions helpers (#1679) * chore: consolidate pyarrow helpers * complete refactor * consolidate pyarrow version checking usage * add unit tests * fix unit_noextras testing error * fix tests * address comments * fix tests * coverage * accept suggestion Co-authored-by: Tim Swast * address comments --------- Co-authored-by: Tim Swast --- google/cloud/bigquery/_helpers.py | 79 +------------ google/cloud/bigquery/_pandas_helpers.py | 103 +++-------------- google/cloud/bigquery/_pyarrow_helpers.py | 123 +++++++++++++++++++++ google/cloud/bigquery/_versions_helpers.py | 94 ++++++++++++++++ google/cloud/bigquery/client.py | 31 +----- google/cloud/bigquery/table.py | 4 +- tests/unit/test__helpers.py | 78 ++----------- tests/unit/test__pandas_helpers.py | 28 +++-- tests/unit/test__pyarrow_helpers.py | 38 +++++++ tests/unit/test__versions_helpers.py | 62 +++++++++++ tests/unit/test_client.py | 24 +--- tests/unit/test_magics.py | 5 +- tests/unit/test_table.py | 17 +-- 13 files changed, 379 insertions(+), 307 deletions(-) create mode 100644 google/cloud/bigquery/_pyarrow_helpers.py create mode 100644 google/cloud/bigquery/_versions_helpers.py create mode 100644 tests/unit/test__pyarrow_helpers.py create mode 100644 tests/unit/test__versions_helpers.py diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 014a721a8..488766853 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -20,7 +20,7 @@ import math import re import os -from typing import Any, Optional, Union +from typing import Optional, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -32,10 +32,7 @@ import packaging.version -from google.cloud.bigquery.exceptions import ( - LegacyBigQueryStorageError, - LegacyPyarrowError, -) +from google.cloud.bigquery import exceptions _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -57,8 +54,6 @@ _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") -_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") - _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" @@ -115,7 +110,7 @@ def verify_version(self): verify the version compatibility at runtime. Raises: - LegacyBigQueryStorageError: + exceptions.LegacyBigQueryStorageError: If the google-cloud-bigquery-storage package is outdated. """ if self.installed_version < _MIN_BQ_STORAGE_VERSION: @@ -123,76 +118,10 @@ def verify_version(self): "Dependency google-cloud-bigquery-storage is outdated, please upgrade " f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." ) - raise LegacyBigQueryStorageError(msg) - - -class PyarrowVersions: - """Version comparisons for pyarrow package.""" - - def __init__(self): - self._installed_version = None - - @property - def installed_version(self) -> packaging.version.Version: - """Return the parsed version of pyarrow.""" - if self._installed_version is None: - import pyarrow # type: ignore - - self._installed_version = packaging.version.parse( - # Use 0.0.0, since it is earlier than any released version. - # Legacy versions also have the same property, but - # creating a LegacyVersion has been deprecated. - # https://github.com/pypa/packaging/issues/321 - getattr(pyarrow, "__version__", "0.0.0") - ) - - return self._installed_version - - @property - def use_compliant_nested_type(self) -> bool: - return self.installed_version.major >= 4 - - def try_import(self, raise_if_error: bool = False) -> Any: - """Verify that a recent enough version of pyarrow extra is - installed. - - The function assumes that pyarrow extra is installed, and should thus - be used in places where this assumption holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Returns: - The ``pyarrow`` module or ``None``. - - Raises: - LegacyPyarrowError: - If the pyarrow package is outdated and ``raise_if_error`` is ``True``. - """ - try: - import pyarrow - except ImportError as exc: # pragma: NO COVER - if raise_if_error: - raise LegacyPyarrowError( - f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}." - ) from exc - return None - - if self.installed_version < _MIN_PYARROW_VERSION: - if raise_if_error: - msg = ( - "Dependency pyarrow is outdated, please upgrade " - f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})." - ) - raise LegacyPyarrowError(msg) - return None - - return pyarrow + raise exceptions.LegacyBigQueryStorageError(msg) BQ_STORAGE_VERSIONS = BQStorageVersions() -PYARROW_VERSIONS = PyarrowVersions() def _not_null(value, field): diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index a14dbec9b..ea790d6c9 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -23,9 +23,9 @@ import warnings from typing import Any, Union -from packaging import version - from google.cloud.bigquery import _helpers +from google.cloud.bigquery import _pyarrow_helpers +from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema try: @@ -49,7 +49,11 @@ db_dtypes_import_exception = exc date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype -pyarrow = _helpers.PYARROW_VERSIONS.try_import() +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() + +_BIGNUMERIC_SUPPORT = False +if pyarrow is not None: + _BIGNUMERIC_SUPPORT = True try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` @@ -119,87 +123,6 @@ def __init__(self): self.done = False -def pyarrow_datetime(): - return pyarrow.timestamp("us", tz=None) - - -def pyarrow_numeric(): - return pyarrow.decimal128(38, 9) - - -def pyarrow_bignumeric(): - # 77th digit is partial. - # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types - return pyarrow.decimal256(76, 38) - - -def pyarrow_time(): - return pyarrow.time64("us") - - -def pyarrow_timestamp(): - return pyarrow.timestamp("us", tz="UTC") - - -if pyarrow: - # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py - # When modifying it be sure to update it there as well. - BQ_TO_ARROW_SCALARS = { - "BOOL": pyarrow.bool_, - "BOOLEAN": pyarrow.bool_, - "BYTES": pyarrow.binary, - "DATE": pyarrow.date32, - "DATETIME": pyarrow_datetime, - "FLOAT": pyarrow.float64, - "FLOAT64": pyarrow.float64, - "GEOGRAPHY": pyarrow.string, - "INT64": pyarrow.int64, - "INTEGER": pyarrow.int64, - "NUMERIC": pyarrow_numeric, - "STRING": pyarrow.string, - "TIME": pyarrow_time, - "TIMESTAMP": pyarrow_timestamp, - } - ARROW_SCALAR_IDS_TO_BQ = { - # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes - pyarrow.bool_().id: "BOOL", - pyarrow.int8().id: "INT64", - pyarrow.int16().id: "INT64", - pyarrow.int32().id: "INT64", - pyarrow.int64().id: "INT64", - pyarrow.uint8().id: "INT64", - pyarrow.uint16().id: "INT64", - pyarrow.uint32().id: "INT64", - pyarrow.uint64().id: "INT64", - pyarrow.float16().id: "FLOAT64", - pyarrow.float32().id: "FLOAT64", - pyarrow.float64().id: "FLOAT64", - pyarrow.time32("ms").id: "TIME", - pyarrow.time64("ns").id: "TIME", - pyarrow.timestamp("ns").id: "TIMESTAMP", - pyarrow.date32().id: "DATE", - pyarrow.date64().id: "DATETIME", # because millisecond resolution - pyarrow.binary().id: "BYTES", - pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - } - - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True - else: - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER - -else: # pragma: NO COVER - BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER - ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER - - BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { "GEOGRAPHY": { b"ARROW:extension:name": b"google:sqlType:geography", @@ -240,7 +163,7 @@ def bq_to_arrow_data_type(field): if field_type_upper in schema._STRUCT_TYPES: return bq_to_arrow_struct_data_type(field) - data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper) + data_type_constructor = _pyarrow_helpers.bq_to_arrow_scalars(field_type_upper) if data_type_constructor is None: return None return data_type_constructor() @@ -568,7 +491,9 @@ def augment_schema(dataframe, current_bq_schema): if pyarrow.types.is_list(arrow_table.type): # `pyarrow.ListType` detected_mode = "REPEATED" - detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.values.type.id) + detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq( + arrow_table.values.type.id + ) # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds # it to such datetimes, causing them to be recognized as TIMESTAMP type. @@ -584,7 +509,7 @@ def augment_schema(dataframe, current_bq_schema): detected_type = "DATETIME" else: detected_mode = field.mode - detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id) + detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) if detected_type is None: unknown_type_fields.append(field) @@ -705,13 +630,13 @@ def dataframe_to_parquet( This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``. """ - pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) + pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) import pyarrow.parquet # type: ignore kwargs = ( {"use_compliant_nested_type": parquet_use_compliant_nested_type} - if _helpers.PYARROW_VERSIONS.use_compliant_nested_type + if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type else {} ) diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py new file mode 100644 index 000000000..7266e5e02 --- /dev/null +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -0,0 +1,123 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared helper functions for connecting BigQuery and pyarrow.""" + +from typing import Any + +from packaging import version + +try: + import pyarrow # type: ignore +except ImportError: # pragma: NO COVER + pyarrow = None + + +def pyarrow_datetime(): + return pyarrow.timestamp("us", tz=None) + + +def pyarrow_numeric(): + return pyarrow.decimal128(38, 9) + + +def pyarrow_bignumeric(): + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types + return pyarrow.decimal256(76, 38) + + +def pyarrow_time(): + return pyarrow.time64("us") + + +def pyarrow_timestamp(): + return pyarrow.timestamp("us", tz="UTC") + + +_BQ_TO_ARROW_SCALARS = {} +_ARROW_SCALAR_IDS_TO_BQ = {} + +if pyarrow: + # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py + # When modifying it be sure to update it there as well. + # Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py + _BQ_TO_ARROW_SCALARS = { + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, + } + + _ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + } + + # Adds bignumeric support only if pyarrow version >= 3.0.0 + # Decimal256 support was added to arrow 3.0.0 + # https://arrow.apache.org/blog/2021/01/25/3.0.0-release/ + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + + +def bq_to_arrow_scalars(bq_scalar: str): + """ + Returns: + The Arrow scalar type that the input BigQuery scalar type maps to. + If it cannot find the BigQuery scalar, return None. + """ + return _BQ_TO_ARROW_SCALARS.get(bq_scalar) + + +def arrow_scalar_ids_to_bq(arrow_scalar: Any): + """ + Returns: + The BigQuery scalar type that the input arrow scalar type maps to. + If it cannot find the arrow scalar, return None. + """ + return _ARROW_SCALAR_IDS_TO_BQ.get(arrow_scalar) diff --git a/google/cloud/bigquery/_versions_helpers.py b/google/cloud/bigquery/_versions_helpers.py new file mode 100644 index 000000000..1f04c74e0 --- /dev/null +++ b/google/cloud/bigquery/_versions_helpers.py @@ -0,0 +1,94 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared helper functions for verifying versions of installed modules.""" + +from typing import Any + +import packaging.version + +from google.cloud.bigquery import exceptions + + +_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") + + +class PyarrowVersions: + """Version comparisons for pyarrow package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of pyarrow.""" + if self._installed_version is None: + import pyarrow # type: ignore + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(pyarrow, "__version__", "0.0.0") + ) + + return self._installed_version + + @property + def use_compliant_nested_type(self) -> bool: + return self.installed_version.major >= 4 + + def try_import(self, raise_if_error: bool = False) -> Any: + """Verify that a recent enough version of pyarrow extra is installed. + + The function assumes that pyarrow extra is installed, and should thus + be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite + the constraints in `setup.py`, the calling code can use this helper + to verify the version compatibility at runtime. + + Returns: + The ``pyarrow`` module or ``None``. + + Raises: + exceptions.LegacyPyarrowError: + If the pyarrow package is outdated and ``raise_if_error`` is + ``True``. + """ + try: + import pyarrow + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise exceptions.LegacyPyarrowError( + "pyarrow package not found. Install pyarrow version >=" + f" {_MIN_PYARROW_VERSION}." + ) from exc + return None + + if self.installed_version < _MIN_PYARROW_VERSION: + if raise_if_error: + msg = ( + "Dependency pyarrow is outdated, please upgrade" + f" it to version >= {_MIN_PYARROW_VERSION}" + f" (version found: {self.installed_version})." + ) + raise exceptions.LegacyPyarrowError(msg) + return None + + return pyarrow + + +PYARROW_VERSIONS = PyarrowVersions() diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index f7c7864a1..ed75215b6 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -27,7 +27,6 @@ import json import math import os -import packaging.version import tempfile import typing from typing import ( @@ -45,13 +44,6 @@ import uuid import warnings -try: - import pyarrow # type: ignore - - _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) -except ImportError: # pragma: NO COVER - pyarrow = None - from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -84,12 +76,13 @@ from google.cloud.bigquery._helpers import _DEFAULT_HOST from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery import exceptions as bq_exceptions from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -121,7 +114,8 @@ from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.format_options import ParquetOptions -from google.cloud.bigquery import _helpers + +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() TimeoutType = Union[float, None] ResumableTimeoutType = Union[ @@ -159,9 +153,6 @@ TIMEOUT_HEADER = "X-Server-Timeout" -# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 -_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) - class Project(object): """Wrapper for resource describing a BigQuery project. @@ -574,7 +565,7 @@ def _ensure_bqstorage_client( try: BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: + except bq_exceptions.LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return None if bqstorage_client is None: @@ -2686,16 +2677,6 @@ def load_table_from_dataframe( try: if new_job_config.source_format == job.SourceFormat.PARQUET: - if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: - msg = ( - "Loading dataframe data in PARQUET format with pyarrow " - f"{_PYARROW_VERSION} can result in data corruption. It is " - "therefore *strongly* advised to use a different pyarrow " - "version or a different source format. " - "See: https://github.com/googleapis/python-bigquery/issues/781" - ) - warnings.warn(msg, category=RuntimeWarning) - if new_job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() @@ -2714,7 +2695,7 @@ def load_table_from_dataframe( compression=parquet_compression, **( {"use_compliant_nested_type": True} - if _helpers.PYARROW_VERSIONS.use_compliant_nested_type + if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type else {} ), ) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 462447d51..a967a1795 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -61,7 +61,7 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.enums import DefaultPandasDTypes -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery import exceptions from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -1616,7 +1616,7 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): try: _helpers.BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: + except exceptions.LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return False diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 4fb86f665..40223f041 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,16 +19,13 @@ import mock +from google.cloud.bigquery import exceptions + try: from google.cloud import bigquery_storage # type: ignore except ImportError: # pragma: NO COVER bigquery_storage = None -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - @unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): @@ -50,28 +47,24 @@ def _call_fut(self): return _helpers.BQ_STORAGE_VERSIONS.verify_version() def test_raises_no_error_w_recent_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): try: self._call_fut() - except LegacyBigQueryStorageError: # pragma: NO COVER + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER self.fail("Legacy error raised with a non-legacy dependency version.") def test_raises_error_w_legacy_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): - with self.assertRaises(LegacyBigQueryStorageError): + with self.assertRaises(exceptions.LegacyBigQueryStorageError): self._call_fut() def test_raises_error_w_unknown_bqstorage_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: del fake_module.__version__ error_pattern = r"version found: 0.0.0" - with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): + with self.assertRaisesRegex( + exceptions.LegacyBigQueryStorageError, error_pattern + ): self._call_fut() def test_installed_version_returns_cached(self): @@ -100,63 +93,6 @@ def test_is_read_session_optional_false(self): assert not versions.is_read_session_optional -@unittest.skipIf(pyarrow is None, "Requires `pyarrow`") -class TestPyarrowVersions(unittest.TestCase): - def tearDown(self): - from google.cloud.bigquery import _helpers - - # Reset any cached versions since it may not match reality. - _helpers.PYARROW_VERSIONS._installed_version = None - - def _object_under_test(self): - from google.cloud.bigquery import _helpers - - return _helpers.PyarrowVersions() - - def _call_try_import(self, **kwargs): - from google.cloud.bigquery import _helpers - - _helpers.PYARROW_VERSIONS._installed_version = None - return _helpers.PYARROW_VERSIONS.try_import(**kwargs) - - def test_try_import_raises_no_error_w_recent_pyarrow(self): - from google.cloud.bigquery.exceptions import LegacyPyarrowError - - with mock.patch("pyarrow.__version__", new="5.0.0"): - try: - pyarrow = self._call_try_import(raise_if_error=True) - self.assertIsNotNone(pyarrow) - except LegacyPyarrowError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_try_import_returns_none_w_legacy_pyarrow(self): - with mock.patch("pyarrow.__version__", new="2.0.0"): - pyarrow = self._call_try_import() - self.assertIsNone(pyarrow) - - def test_try_import_raises_error_w_legacy_pyarrow(self): - from google.cloud.bigquery.exceptions import LegacyPyarrowError - - with mock.patch("pyarrow.__version__", new="2.0.0"): - with self.assertRaises(LegacyPyarrowError): - self._call_try_import(raise_if_error=True) - - def test_installed_version_returns_cached(self): - versions = self._object_under_test() - versions._installed_version = object() - assert versions.installed_version is versions._installed_version - - def test_installed_version_returns_parsed_version(self): - versions = self._object_under_test() - - with mock.patch("pyarrow.__version__", new="1.2.3"): - version = versions.installed_version - - assert version.major == 1 - assert version.minor == 2 - assert version.micro == 3 - - class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _not_null diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index a4cc1fefb..7724f308b 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -41,10 +41,12 @@ from google.cloud.bigquery import exceptions from google.cloud.bigquery import _helpers +from google.cloud.bigquery import _pyarrow_helpers +from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT -pyarrow = _helpers.PYARROW_VERSIONS.try_import() +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() if pyarrow: import pyarrow.parquet @@ -346,14 +348,14 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field04", pyarrow.int64()), pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), + pyarrow.field("field07", _pyarrow_helpers.pyarrow_numeric()), + pyarrow.field("field08", _pyarrow_helpers.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), - pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field11", _pyarrow_helpers.pyarrow_timestamp()), pyarrow.field("field12", pyarrow.date32()), - pyarrow.field("field13", module_under_test.pyarrow_time()), - pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field13", _pyarrow_helpers.pyarrow_time()), + pyarrow.field("field14", _pyarrow_helpers.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) expected = pyarrow.struct(expected) @@ -394,14 +396,14 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field04", pyarrow.int64()), pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), + pyarrow.field("field07", _pyarrow_helpers.pyarrow_numeric()), + pyarrow.field("field08", _pyarrow_helpers.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), - pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field11", _pyarrow_helpers.pyarrow_timestamp()), pyarrow.field("field12", pyarrow.date32()), - pyarrow.field("field13", module_under_test.pyarrow_time()), - pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field13", _pyarrow_helpers.pyarrow_time()), + pyarrow.field("field14", _pyarrow_helpers.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) expected_value_type = pyarrow.struct(expected) @@ -1117,7 +1119,9 @@ def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError( "pyarrow not installed" ) - monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import) + monkeypatch.setattr( + _versions_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import + ) with pytest.raises(exceptions.LegacyPyarrowError): module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) diff --git a/tests/unit/test__pyarrow_helpers.py b/tests/unit/test__pyarrow_helpers.py new file mode 100644 index 000000000..f0a872c88 --- /dev/null +++ b/tests/unit/test__pyarrow_helpers.py @@ -0,0 +1,38 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") + + +@pytest.fixture +def module_under_test(): + from google.cloud.bigquery import _pyarrow_helpers + + return _pyarrow_helpers + + +def test_bq_to_arrow_scalars(module_under_test): + assert ( + module_under_test.bq_to_arrow_scalars("BIGNUMERIC") + == module_under_test.pyarrow_bignumeric + ) + assert module_under_test.bq_to_arrow_scalars("UNKNOWN_TYPE") is None + + +def test_arrow_scalar_ids_to_bq(module_under_test): + assert module_under_test.arrow_scalar_ids_to_bq(pyarrow.bool_().id) == "BOOL" + assert module_under_test.arrow_scalar_ids_to_bq("UNKNOWN_TYPE") is None diff --git a/tests/unit/test__versions_helpers.py b/tests/unit/test__versions_helpers.py new file mode 100644 index 000000000..21386610b --- /dev/null +++ b/tests/unit/test__versions_helpers.py @@ -0,0 +1,62 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import mock + +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions + +pyarrow = pytest.importorskip("pyarrow") + + +def test_try_import_raises_no_error_w_recent_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="5.0.0"): + try: + pyarrow = versions.try_import(raise_if_error=True) + assert pyarrow is not None + except exceptions.LegacyPyarrowError: # pragma: NO COVER + raise ("Legacy error raised with a non-legacy dependency version.") + + +def test_try_import_returns_none_w_legacy_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="2.0.0"): + pyarrow = versions.try_import() + assert pyarrow is None + + +def test_try_import_raises_error_w_legacy_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="2.0.0"): + with pytest.raises(exceptions.LegacyPyarrowError): + versions.try_import(raise_if_error=True) + + +def test_installed_version_returns_cached(): + versions = _versions_helpers.PyarrowVersions() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + +def test_installed_version_returns_parsed_version(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index faa065116..3143f2123 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -27,8 +27,8 @@ import warnings import mock -import packaging import requests +import packaging import pytest import pkg_resources @@ -65,6 +65,7 @@ from google.cloud import bigquery from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery import exceptions from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from google.cloud.bigquery import ParquetOptions @@ -821,14 +822,12 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_ensure_bqstorage_client_obsolete_dependency(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) patcher = mock.patch( "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: bqstorage_client = client._ensure_bqstorage_client() @@ -857,15 +856,13 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_ensure_bqstorage_client_existing_client_check_fails(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) mock_storage_client = mock.sentinel.mock_storage_client patcher = mock.patch( "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) @@ -8615,7 +8612,7 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): dataframe = pandas.DataFrame(records) pyarrow_version_patch = mock.patch( - "google.cloud.bigquery.client._PYARROW_VERSION", + "google.cloud.bigquery._versions_helpers.PYARROW_VERSIONS._installed_version", packaging.version.parse("2.0.0"), # A known bad version of pyarrow. ) get_table_patch = mock.patch( @@ -8628,22 +8625,13 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): ) with load_patch, get_table_patch, pyarrow_version_patch: - with warnings.catch_warnings(record=True) as warned: + with pytest.raises(exceptions.LegacyPyarrowError): client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION, ) - expected_warnings = [ - warning for warning in warned if "pyarrow" in str(warning).lower() - ] - assert len(expected_warnings) == 1 - assert issubclass(expected_warnings[0].category, RuntimeWarning) - msg = str(expected_warnings[0].message) - assert "pyarrow 2.0.0" in msg - assert "data corruption" in msg - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 70bfc4d0c..0cab943f7 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -25,6 +25,7 @@ from test_utils.imports import maybe_fail_import from google.cloud import bigquery +from google.cloud.bigquery import exceptions as bq_exceptions from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery.retry import DEFAULT_TIMEOUT @@ -357,8 +358,6 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test__make_bqstorage_client_true_obsolete_dependency(): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -368,7 +367,7 @@ def test__make_bqstorage_client_true_obsolete_dependency(): patcher = mock.patch( "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + side_effect=bq_exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: got = magics._make_bqstorage_client(test_client, True, {}) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a2c82c0a8..65eb659bf 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -28,6 +28,8 @@ import google.api_core.exceptions from test_utils.imports import maybe_fail_import +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -40,17 +42,12 @@ bigquery_storage = None big_query_read_grpc_transport = None -from google.cloud.bigquery import _helpers -pyarrow = _helpers.PYARROW_VERSIONS.try_import() -PYARROW_VERSION = pkg_resources.parse_version("0.0.1") +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() if pyarrow: - import pyarrow import pyarrow.types - PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) - try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -73,8 +70,6 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None -PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") - if pandas is not None: PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version else: @@ -2262,13 +2257,11 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: result = iterator._validate_bqstorage( @@ -2874,7 +2867,7 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): row_iterator = self._make_one(mock_client, api_request, path, schema) def mock_verify_version(): - raise _helpers.LegacyBigQueryStorageError("no bqstorage") + raise exceptions.LegacyBigQueryStorageError("no bqstorage") with mock.patch( "google.cloud.bigquery._helpers.BQ_STORAGE_VERSIONS.verify_version", From 8f187e62b256882c31b84c1d2373d56ad28cabb4 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 19 Oct 2023 11:48:47 -0700 Subject: [PATCH 170/536] chore: rename rst files to avoid conflict with service names (#1689) Source-Link: https://github.com/googleapis/synthtool/commit/d52e638b37b091054c869bfa6f5a9fedaba9e0dd Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:4f9b3b106ad0beafc2c8a415e3f62c1a0cc23cabea115dbe841b848f581cfe99 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index dd98abbde..7f291dbd5 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:08e34975760f002746b1d8c86fdc90660be45945ee6d9db914d1508acdf9a547 -# created: 2023-10-09T14:06:13.397766266Z + digest: sha256:4f9b3b106ad0beafc2c8a415e3f62c1a0cc23cabea115dbe841b848f581cfe99 +# created: 2023-10-18T20:26:37.410353675Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 0332d3267..16170d0ca 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -467,9 +467,9 @@ typing-extensions==4.4.0 \ --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e # via -r requirements.in -urllib3==1.26.17 \ - --hash=sha256:24d6a242c28d29af46c3fae832c36db3bbebcc533dd1bb549172cd739c82df21 \ - --hash=sha256:94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b +urllib3==1.26.18 \ + --hash=sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07 \ + --hash=sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0 # via # requests # twine From a40d7ae03149708fc34c962b43a6ac198780b6aa Mon Sep 17 00:00:00 2001 From: Jonathan Ostrander Date: Thu, 19 Oct 2023 16:07:37 -0400 Subject: [PATCH 171/536] fix: AccessEntry API representation parsing (#1682) * fix: AccessEntry API representation parsing Overriding the `AccessEntry#_properties` with a deep copy of the API resource overwrites the `role` property set in `AccessEntry.__init__` which isn't present in the resource if the `role` is set to `None`. This causes `AccessEntry`s generated from API representations to no longer evaluate to equal with equivalent `AccessEntry` resources instantiated through `AccessEntry.__init__`. The added unit test fails without the change and passes with the change. * build: formatting --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/dataset.py | 4 +--- tests/unit/test_dataset.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index b7fed61c7..0f1a0f3cc 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -501,9 +501,7 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": if len(entry) != 0: raise ValueError("Entry has unexpected keys remaining.", entry) - config = cls(role, entity_type, entity_id) - config._properties = copy.deepcopy(resource) - return config + return cls(role, entity_type, entity_id) class Dataset(object): diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 7d7091092..0a709ab43 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -152,6 +152,22 @@ def test_from_api_repr_w_unknown_entity_type(self): exp_resource = entry.to_api_repr() self.assertEqual(resource, exp_resource) + def test_from_api_repr_wo_role(self): + resource = { + "view": { + "projectId": "my-project", + "datasetId": "my_dataset", + "tableId": "my_table", + } + } + entry = self._get_target_class().from_api_repr(resource) + exp_entry = self._make_one( + role=None, + entity_type="view", + entity_id=resource["view"], + ) + self.assertEqual(entry, exp_entry) + def test_to_api_repr_w_extra_properties(self): resource = { "role": "READER", From 2dded33626b3de6c4ab5e1229eb4c85786b2ff53 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 19 Oct 2023 16:56:00 -0500 Subject: [PATCH 172/536] docs: remove redundant `bigquery_update_table_expiration` code sample (#1673) New version of this sample added in https://github.com/googleapis/python-bigquery/pull/1457 and migrated to in the docs in internal change 570781706. Co-authored-by: Lingqing Gan --- docs/snippets.py | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 62b0b6fd6..64f5361cd 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -203,52 +203,6 @@ def test_update_table_description(client, to_delete): # [END bigquery_update_table_description] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_update_table_expiration(client, to_delete): - """Update a table's expiration time.""" - dataset_id = "update_table_expiration_dataset_{}".format(_millis()) - table_id = "update_table_expiration_table_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # TODO(thejaredchapman): After code sample has been updated from cloud.google.com delete this. - - # [START bigquery_update_table_expiration] - import datetime - - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, dataset_id) - # table_ref = dataset_ref.table('my_table') - # table = client.get_table(table_ref) # API request - - assert table.expires is None - - # set table to expire 5 days from now - expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( - days=5 - ) - table.expires = expiration - table = client.update_table(table, ["expires"]) # API request - - # expiration is stored in milliseconds - margin = datetime.timedelta(microseconds=1000) - assert expiration - margin <= table.expires <= expiration + margin - # [END bigquery_update_table_expiration] - - @pytest.mark.skip( reason=( "update_table() is flaky " From 345e06efa0866093607364f70245a95f88e5c9b1 Mon Sep 17 00:00:00 2001 From: nayaknishant Date: Mon, 23 Oct 2023 12:15:18 -0400 Subject: [PATCH 173/536] chore: increasing Shapely dependency upper bound (#1696) * adding ASCII support for external config * adding tests for preserveAscii... * adding tests for preserveAscii... * changing 'False' to False * linting * bumping up Shapely dependency upper bound --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 08106f694..4e87b3b84 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ "ipywidgets>=7.7.0", "ipykernel>=6.0.0", ], - "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <2.0dev"], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <3.0.0dev"], "ipython": [ "ipython>=7.23.1,!=8.1.0", "ipykernel>=6.0.0", From 76d0e5abc307b82b634c37d7d02dc4dee874f67b Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 23 Oct 2023 13:35:07 -0400 Subject: [PATCH 174/536] chore: benchmark revamp (#1694) * chore: benchmark revamp * foramt and add more documentation --- benchmark/README.md | 128 +++++++++++++++- benchmark/benchmark.py | 339 +++++++++++++++++++++++++++++++++++++---- benchmark/queries.json | 26 ++-- noxfile.py | 2 + 4 files changed, 450 insertions(+), 45 deletions(-) diff --git a/benchmark/README.md b/benchmark/README.md index 435926acb..33065807e 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -1,8 +1,128 @@ # BigQuery Benchmark -This directory contains benchmarks for BigQuery client. +This directory contains benchmark scripts for BigQuery client. It is created primarily for project +maintainers to measure library performance. ## Usage -`python benchmark.py queries.json` +`python benchmark.py` -BigQuery service caches requests so the benchmark should be run -at least twice, disregarding the first result. + +### Flags +Run `python benchmark.py -h` for detailed information on available flags. + +`--reruns` can be used to override the default number of times a query is rerun. Must be a positive +integer. Default value is 3. + +`--projectid` can be used to run benchmarks in a different project. If unset, the GOOGLE_CLOUD_PROJECT + environment variable is used. + +`--queryfile` can be used to override the default file which contains queries to be instrumented. + +`--table` can be used to specify a table to which benchmarking results should be streamed. The format +for this string is in BigQuery standard SQL notation without escapes, e.g. `projectid.datasetid.tableid` + +`--create_table` can be used to have the benchmarking tool create the destination table prior to streaming. + +`--tag` allows arbitrary key:value pairs to be set. This flag can be specified multiple times. + +When `--create_table` flag is set, must also specify the name of the new table using `--table`. + +### Example invocations + +Setting all the flags +``` +python benchmark.py \ + --reruns 5 \ + --projectid test_project_id \ + --table logging_project_id.querybenchmarks.measurements \ + --create_table \ + --tag source:myhostname \ + --tag somekeywithnovalue \ + --tag experiment:special_environment_thing +``` + +Or, a more realistic invocation using shell substitions: +``` +python benchmark.py \ + --reruns 5 \ + --table $BENCHMARK_TABLE \ + --tag origin:$(hostname) \ + --tag branch:$(git branch --show-current) \ + --tag latestcommit:$(git log --pretty=format:'%H' -n 1) +``` + +## Stream Results To A BigQuery Table + +When streaming benchmarking results to a BigQuery table, the table schema is as follows: +``` +[ + { + "name": "groupname", + "type": "STRING" + }, + { + "name": "name", + "type": "STRING" + }, + { + "name": "tags", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + { + "name": "key", + "type": "STRING" + }, + { + "name": "value", + "type": "STRING" + } + ] + }, + { + "name": "SQL", + "type": "STRING" + }, + { + "name": "runs", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + { + "name": "errorstring", + "type": "STRING" + }, + { + "name": "start_time", + "type": "TIMESTAMP" + }, + { + "name": "query_end_time", + "type": "TIMESTAMP" + }, + { + "name": "first_row_returned_time", + "type": "TIMESTAMP" + }, + { + "name": "all_rows_returned_time", + "type": "TIMESTAMP" + }, + { + "name": "total_rows", + "type": "INTEGER" + } + ] + }, + { + "name": "event_time", + "type": "TIMESTAMP" + } +] +``` + +The table schema is the same as the [benchmark in go](https://github.com/googleapis/google-cloud-go/tree/main/bigquery/benchmarks), +so results from both languages can be streamed to the same table. + +## BigQuery Benchmarks In Other Languages +* Go: https://github.com/googleapis/google-cloud-go/tree/main/bigquery/benchmarks +* JAVA: https://github.com/googleapis/java-bigquery/tree/main/benchmark diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 2917f169a..30e294baa 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,35 +12,312 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud import bigquery +"""Scripts for benchmarking BigQuery queries performance.""" + +import argparse from datetime import datetime import json -import sys - -if len(sys.argv) < 2: - raise Exception('need query file, usage: python {0} '.format(sys.argv[0])) - -with open(sys.argv[1], 'r') as f: - queries = json.loads(f.read()) - -client = bigquery.Client() - -for query in queries: - start_time = datetime.now() - job = client.query(query) - rows = job.result() - - num_rows = 0 - num_cols = None - first_byte_time = None - - for row in rows: - if num_rows == 0: - num_cols = len(row) - first_byte_time = datetime.now() - start_time - elif num_cols != len(row): - raise Exception('found {0} columsn, expected {1}'.format(len(row), num_cols)) - num_rows += 1 - total_time = datetime.now() - start_time - print("query {0}: {1} rows, {2} cols, first byte {3} sec, total {4} sec" - .format(query, num_rows, num_cols, first_byte_time.total_seconds(), total_time.total_seconds())) +import os + +from google.api_core import exceptions + +from google.cloud import bigquery + +_run_schema = [ + bigquery.SchemaField("groupname", "STRING", mode="NULLABLE"), + bigquery.SchemaField("name", "STRING", mode="NULLABLE"), + bigquery.SchemaField( + "tags", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("key", "STRING", mode="NULLABLE"), + bigquery.SchemaField("value", "STRING", mode="NULLABLE"), + ], + ), + bigquery.SchemaField("SQL", "STRING", mode="NULLABLE"), + bigquery.SchemaField( + "runs", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("errorstring", "STRING", mode="NULLABLE"), + bigquery.SchemaField("start_time", "TIMESTAMP", mode="NULLABLE"), + bigquery.SchemaField("query_end_time", "TIMESTAMP", mode="NULLABLE"), + bigquery.SchemaField( + "first_row_returned_time", "TIMESTAMP", mode="NULLABLE" + ), + bigquery.SchemaField( + "all_rows_returned_time", "TIMESTAMP", mode="NULLABLE" + ), + bigquery.SchemaField("total_rows", "INTEGER", mode="NULLABLE"), + ], + ), + bigquery.SchemaField("event_time", "TIMESTAMP", mode="NULLABLE"), +] + + +def _check_pos_int(value): + """Verifies the value is a positive integer.""" + ivalue = int(value) + if ivalue <= 0: + raise argparse.ArgumentTypeError( + f"Argument rerun should be positive int. Actual value: {value}" + ) + return ivalue + + +def _parse_tag(tag): + """Parses input tag into key value pair as a dict.""" + tagstring = str(tag) + key, value = tagstring.split(":") + if not key or not value: + raise argparse.ArgumentTypeError( + "key and value in tag need to be non-empty. Actual value: " + + f"key={key}, value={value}" + ) + return {"key": key, "value": value} + + +def _parse_args() -> dict: + """Parses input flags.""" + parser = argparse.ArgumentParser(description="Benchmark for BigQuery.") + + parser.add_argument( + "--reruns", + action="store", + type=_check_pos_int, + default=3, + metavar="", + help="how many times each query is run. Must be a positive integer." + + "Default 3 times", + ) + + parser.add_argument( + "--projectid", + action="store", + type=str, + metavar="", + help="run benchmarks in a different project. If unset, the " + + "GOOGLE_CLOUD_PROJECT environment variable is used", + ) + + parser.add_argument( + "--queryfile", + action="store", + type=str, + metavar="", + default="queries.json", + help="override the default file which contains queries to be instrumented", + ) + + parser.add_argument( + "--table", + action="store", + type=str, + metavar="", + help="specify a table to which benchmarking results should be " + + "streamed. The format for this string is in BigQuery standard SQL " + + "notation without escapes, e.g. projectid.datasetid.tableid", + ) + + parser.add_argument( + "--create_table", + action="store_true", + help="let the benchmarking tool create the destination table prior to" + + " streaming; if set, also need to set --table to specify table name", + ) + + parser.add_argument( + "--tag", + action="append", + type=_parse_tag, + metavar="", + help="set arbitrary key:value pairs, can be set multiple times", + ) + + args = parser.parse_args() + args_dict = vars(args) + + # Verifies that project id is set. + if not args_dict.get("projectid"): + if projectid_env := os.environ["GOOGLE_CLOUD_PROJECT"]: + args_dict["projectid"] = projectid_env + else: + raise ValueError( + "Must provide --projectid or set " + "GOOGLE_CLOUD_PROJECT environment variable" + ) + + # Verifies that table name is specified when `create_table == True`. + if args_dict.get("create_table") and not args_dict.get("table"): + raise ValueError( + "When --create_table is present, must specify table name with --table" + ) + + return args_dict + + +def _prepare_table(client, create_table: bool, table_name: str) -> str: + """Ensures a table exists, and optionally creates it if directed.""" + + # Verifies that table destination is of valid format. + parts = table_name.split(".") + if len(parts) != 3: + raise ValueError(f"Expected table in p.d.t format, got: {table_name}") + + table = bigquery.Table(table_name, schema=_run_schema) + + # Create table if create_table == True. + if create_table: + table = client.create_table(table) + print(f"Created table {table.project}.{table.dataset_id}." f"{table.table_id}") + + # Verifies that table exists. + client.get_table(table_name) + return table_name + + +def _run_query(client, query: str, rerun: int) -> list: + """Runs individual query for `rerun` times, and returns run results.""" + runs = [] + + for _ in range(rerun): + print(".", end="", flush=True) + run = {} + num_rows = 0 + num_cols = 0 + start_time = datetime.now() + first_row_time = datetime.min + end_time = datetime.min + + job = client.query(query) + query_end_time = datetime.now() + + try: + rows = job.result() + for row in rows: + if num_rows == 0: + num_cols = len(row) + first_row_time = datetime.now() + elif num_cols != len(row): + raise RuntimeError(f"found {len(row)} columns, expected {num_cols}") + num_rows += 1 + end_time = datetime.now() + except exceptions.BadRequest as exc: + run["errorstring"] = repr(exc) + + run["start_time"] = start_time.isoformat() + run["query_end_time"] = query_end_time.isoformat() + run["first_row_returned_time"] = first_row_time.isoformat() + run["all_rows_returned_time"] = end_time.isoformat() + run["total_rows"] = num_rows + runs.append(run) + + print("") + return runs + + +def _get_delta(time_str_1: str, time_str_2: str) -> str: + """Calculates delta of two ISO format time string, and return as a string.""" + time_1 = datetime.fromisoformat(time_str_1) + time_2 = datetime.fromisoformat(time_str_2) + delta = time_1 - time_2 + return str(delta) + + +def _is_datetime_min(time_str: str) -> bool: + return datetime.fromisoformat(time_str) == datetime.min + + +def _summary(run: dict) -> str: + """Coverts run dict to run summary string.""" + no_val = "NODATA" + output = ["QUERYTIME "] + + if not _is_datetime_min(run.get("query_end_time")): + output.append(f"{_get_delta(run.get('query_end_time'), run.get('start_time'))}") + else: + output.append(no_val) + output.append(" FIRSTROW ") + + if not _is_datetime_min(run.get("first_row_returned_time")): + output.append( + f"{_get_delta(run.get('first_row_returned_time'), run.get('start_time'))}" + ) + else: + output.append(no_val) + output += " ALLROWS " + + if not _is_datetime_min(run.get("all_rows_returned_time")): + output.append( + f"{_get_delta(run.get('all_rows_returned_time'), run.get('start_time'))}" + ) + else: + output.append(no_val) + + if run.get("total_rows"): + output.append(f" ROWS {run.get('total_rows')}") + if run.get("errorstring"): + output.append(f" ERRORED {run.get('errorstring')}") + + return "".join(output) + + +def _print_results(profiles: list): + for i, prof in enumerate(profiles): + print(f"{i+1}: ({prof['groupname']}:{prof['name']})") + print(f"SQL: {prof['SQL']}") + print("MEASUREMENTS") + for j, run in enumerate(prof["runs"]): + print(f"\t\t({j}) {_summary(run)}") + + +def _run_benchmarks(args: dict) -> list: + client = bigquery.Client() + + # If we're going to stream results, let's make sure we can do that + # before running all the tests. + table_id = "" + if args.get("create_table") or args.get("table"): + table_id = _prepare_table(client, args.get("create_table"), args.get("table")) + + queries_file = args.get("queryfile") + with open(queries_file, "r") as f: + groups = json.loads(f.read()) + + measure_start = datetime.now() + profiles = [] + for group_name, group in groups.items(): + for name, query in group.items(): + print(f"Measuring {group_name} : {name}", end="", flush=True) + event_time = datetime.now() + runs = _run_query(client, query, args.get("reruns")) + + profile = {} + profile["groupname"] = group_name + profile["name"] = name + profile["tags"] = args.get("tag") or [] + profile["SQL"] = query + profile["runs"] = runs + profile["event_time"] = event_time.isoformat() + profiles.append(profile) + + measure_end = datetime.now() + print(f"Measurement time: {str(measure_end-measure_start)}") + + # Stream benchmarking results to table, if required. + if table_id: + print(f"Streaming test results to table {table_id}...") + errors = client.insert_rows_json(table_id, profiles) + if errors: + raise RuntimeError(f"Cannot upload queries profiles: {errors}") + print("Streaming complete.") + + return profiles + + +if __name__ == "__main__": + args = _parse_args() + profiles = _run_benchmarks(args) + _print_results(profiles) diff --git a/benchmark/queries.json b/benchmark/queries.json index 13fed38b5..464395619 100644 --- a/benchmark/queries.json +++ b/benchmark/queries.json @@ -1,10 +1,16 @@ -[ - "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 10000", - "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 100000", - "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 1000000", - "SELECT title FROM `bigquery-public-data.samples.wikipedia` ORDER BY title LIMIT 1000", - "SELECT title, id, timestamp, contributor_ip FROM `bigquery-public-data.samples.wikipedia` WHERE title like 'Blo%' ORDER BY id", - "SELECT * FROM `bigquery-public-data.baseball.games_post_wide` ORDER BY gameId", - "SELECT * FROM `bigquery-public-data.samples.github_nested` WHERE repository.has_downloads ORDER BY repository.created_at LIMIT 10000", - "SELECT repo_name, path FROM `bigquery-public-data.github_repos.files` WHERE path LIKE '%.java' ORDER BY id LIMIT 1000000" -] +{ + "simple-cacheable": { + "nycyellow-limit1k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 1000", + "nycyellow-limit10k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 10000", + "nycyellow-limit100k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 100000", + "wikisamples-ordered-limit1k":"SELECT title FROM `bigquery-public-data.samples.wikipedia` ORDER BY title LIMIT 1000" + }, + "simple-nondeterministic": { + "current-timestamp":"SELECT CURRENT_TIMESTAMP() as ts", + "session-user": "SELECT SESSION_USER() as ts", + "literals": "SELECT 1 as i, 3.14 as pi" + }, + "simple-invalid": { + "invalid-query": "invalid sql here" + } +} diff --git a/noxfile.py b/noxfile.py index ba06f925d..4ddd4eaaf 100644 --- a/noxfile.py +++ b/noxfile.py @@ -26,6 +26,7 @@ PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ( + "benchmark", "docs", "google", "samples", @@ -381,6 +382,7 @@ def lint(session): session.run("flake8", "tests") session.run("flake8", os.path.join("docs", "samples")) session.run("flake8", os.path.join("docs", "snippets.py")) + session.run("flake8", "benchmark") session.run("black", "--check", *BLACK_PATHS) From e8da97895e6ab1df0ddcfe4316cfe0a7d3027c06 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 26 Oct 2023 15:46:06 -0400 Subject: [PATCH 175/536] chore: refactor BigQueryStorageVersions (#1699) * chore: refactor BigQueryStorageVersions * address comments in #1680 * add unit test --- google/cloud/bigquery/_helpers.py | 65 ----------- google/cloud/bigquery/_pandas_helpers.py | 3 +- google/cloud/bigquery/_versions_helpers.py | 81 +++++++++++++- google/cloud/bigquery/client.py | 42 +++---- google/cloud/bigquery/exceptions.py | 6 + google/cloud/bigquery/magics/magics.py | 27 ++++- google/cloud/bigquery/table.py | 20 ++-- tests/unit/test__helpers.py | 73 ------------- tests/unit/test__pandas_helpers.py | 18 ++- tests/unit/test__versions_helpers.py | 121 ++++++++++++++++++++- tests/unit/test_client.py | 4 +- tests/unit/test_magics.py | 16 ++- tests/unit/test_table.py | 6 +- 13 files changed, 286 insertions(+), 196 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 488766853..684cbfc12 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -30,10 +30,6 @@ from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes -import packaging.version - -from google.cloud.bigquery import exceptions - _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" @@ -52,10 +48,6 @@ r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) -_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") - -_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") - BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" """Environment variable defining host for emulator.""" @@ -67,63 +59,6 @@ def _get_bigquery_host(): return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST) -class BQStorageVersions: - """Version comparisons for google-cloud-bigqueyr-storage package.""" - - def __init__(self): - self._installed_version = None - - @property - def installed_version(self) -> packaging.version.Version: - """Return the parsed version of google-cloud-bigquery-storage.""" - if self._installed_version is None: - from google.cloud import bigquery_storage - - self._installed_version = packaging.version.parse( - # Use 0.0.0, since it is earlier than any released version. - # Legacy versions also have the same property, but - # creating a LegacyVersion has been deprecated. - # https://github.com/pypa/packaging/issues/321 - getattr(bigquery_storage, "__version__", "0.0.0") - ) - - return self._installed_version # type: ignore - - @property - def is_read_session_optional(self) -> bool: - """True if read_session is optional to rows(). - - See: https://github.com/googleapis/python-bigquery-storage/pull/228 - """ - return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION - - def verify_version(self): - """Verify that a recent enough version of BigQuery Storage extra is - installed. - - The function assumes that google-cloud-bigquery-storage extra is - installed, and should thus be used in places where this assumption - holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Raises: - exceptions.LegacyBigQueryStorageError: - If the google-cloud-bigquery-storage package is outdated. - """ - if self.installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." - ) - raise exceptions.LegacyBigQueryStorageError(msg) - - -BQ_STORAGE_VERSIONS = BQStorageVersions() - - def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" return value is not None or (field is not None and field.mode != "NULLABLE") diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index ea790d6c9..53db9511c 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -23,7 +23,6 @@ import warnings from typing import Any, Union -from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema @@ -745,7 +744,7 @@ def _download_table_bqstorage_stream( # Avoid deprecation warnings for passing in unnecessary read session. # https://github.com/googleapis/python-bigquery-storage/issues/229 - if _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: rowstream = reader.rows() else: rowstream = reader.rows(session) diff --git a/google/cloud/bigquery/_versions_helpers.py b/google/cloud/bigquery/_versions_helpers.py index 1f04c74e0..ce529b76e 100644 --- a/google/cloud/bigquery/_versions_helpers.py +++ b/google/cloud/bigquery/_versions_helpers.py @@ -22,6 +22,8 @@ _MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") +_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") +_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") class PyarrowVersions: @@ -51,7 +53,7 @@ def use_compliant_nested_type(self) -> bool: return self.installed_version.major >= 4 def try_import(self, raise_if_error: bool = False) -> Any: - """Verify that a recent enough version of pyarrow extra is installed. + """Verifies that a recent enough version of pyarrow extra is installed. The function assumes that pyarrow extra is installed, and should thus be used in places where this assumption holds. @@ -92,3 +94,80 @@ def try_import(self, raise_if_error: bool = False) -> Any: PYARROW_VERSIONS = PyarrowVersions() + + +class BQStorageVersions: + """Version comparisons for google-cloud-bigqueyr-storage package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of google-cloud-bigquery-storage.""" + if self._installed_version is None: + from google.cloud import bigquery_storage + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(bigquery_storage, "__version__", "0.0.0") + ) + + return self._installed_version # type: ignore + + @property + def is_read_session_optional(self) -> bool: + """True if read_session is optional to rows(). + + See: https://github.com/googleapis/python-bigquery-storage/pull/228 + """ + return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION + + def try_import(self, raise_if_error: bool = False) -> Any: + """Tries to import the bigquery_storage module, and returns results + accordingly. It also verifies the module version is recent enough. + + If the import succeeds, returns the ``bigquery_storage`` module. + + If the import fails, + returns ``None`` when ``raise_if_error == False``, + raises Error when ``raise_if_error == True``. + + Returns: + The ``bigquery_storage`` module or ``None``. + + Raises: + exceptions.BigQueryStorageNotFoundError: + If google-cloud-bigquery-storage is not installed + exceptions.LegacyBigQueryStorageError: + If google-cloud-bigquery-storage package is outdated + """ + try: + from google.cloud import bigquery_storage # type: ignore + except ImportError: + if raise_if_error: + msg = ( + "Package google-cloud-bigquery-storage not found. " + "Install google-cloud-bigquery-storage version >= " + f"{_MIN_BQ_STORAGE_VERSION}." + ) + raise exceptions.BigQueryStorageNotFoundError(msg) + return None + + if self.installed_version < _MIN_BQ_STORAGE_VERSION: + if raise_if_error: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, " + f"please upgrade it to version >= {_MIN_BQ_STORAGE_VERSION} " + f"(version found: {self.installed_version})." + ) + raise exceptions.LegacyBigQueryStorageError(msg) + return None + + return bigquery_storage + + +BQ_STORAGE_VERSIONS = BQStorageVersions() diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index ed75215b6..e17d6b8da 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -65,26 +65,25 @@ DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore +from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _job_helpers -from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id +from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import enums +from google.cloud.bigquery import exceptions as bq_exceptions +from google.cloud.bigquery import job from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._helpers import _get_bigquery_host -from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _DEFAULT_HOST -from google.cloud.bigquery._http import Connection -from google.cloud.bigquery import _pandas_helpers -from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery import exceptions as bq_exceptions -from google.cloud.bigquery.opentelemetry_tracing import create_span -from google.cloud.bigquery import job +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, CopyJobConfig, @@ -98,6 +97,7 @@ from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref +from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.retry import ( DEFAULT_JOB_RETRY, @@ -113,7 +113,6 @@ from google.cloud.bigquery.table import TableListItem from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import RowIterator -from google.cloud.bigquery.format_options import ParquetOptions pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() @@ -545,29 +544,32 @@ def _ensure_bqstorage_client( An existing BigQuery Storage client instance. If ``None``, a new instance is created and returned. client_options: - Custom options used with a new BigQuery Storage client instance if one - is created. + Custom options used with a new BigQuery Storage client instance + if one is created. client_info: - The client info used with a new BigQuery Storage client instance if one - is created. + The client info used with a new BigQuery Storage client + instance if one is created. Returns: A BigQuery Storage API client. """ + try: - from google.cloud import bigquery_storage # type: ignore - except ImportError: + bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import( + raise_if_error=True + ) + except bq_exceptions.BigQueryStorageNotFoundError: warnings.warn( "Cannot create BigQuery Storage client, the dependency " "google-cloud-bigquery-storage is not installed." ) return None - - try: - BQ_STORAGE_VERSIONS.verify_version() except bq_exceptions.LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) + warnings.warn( + "Dependency google-cloud-bigquery-storage is outdated: " + str(exc) + ) return None + if bqstorage_client is None: bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py index 2bab97fea..e94a6c832 100644 --- a/google/cloud/bigquery/exceptions.py +++ b/google/cloud/bigquery/exceptions.py @@ -23,3 +23,9 @@ class LegacyBigQueryStorageError(BigQueryError): class LegacyPyarrowError(BigQueryError): """Raised when too old a version of pyarrow package is detected at runtime.""" + + +class BigQueryStorageNotFoundError(BigQueryError): + """Raised when BigQuery Storage extra is not installed when trying to + import it. + """ diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index f92f77541..2a3583c66 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -104,6 +104,8 @@ import google.auth # type: ignore from google.cloud import bigquery import google.cloud.bigquery.dataset +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.magics import line_arg_parser as lap @@ -744,12 +746,31 @@ def _split_args_line(line): def _make_bqstorage_client(client, use_bqstorage_api, client_options): + """Creates a BigQuery Storage client. + + Args: + client (:class:`~google.cloud.bigquery.client.Client`): BigQuery client. + use_bqstorage_api (bool): whether BigQuery Storage API is used or not. + client_options (:class:`google.api_core.client_options.ClientOptions`): + Custom options used with a new BigQuery Storage client instance + if one is created. + + Raises: + ImportError: if google-cloud-bigquery-storage is not installed, or + grpcio package is not installed. + + + Returns: + None: if ``use_bqstorage_api == False``, or google-cloud-bigquery-storage + is outdated. + BigQuery Storage Client: + """ if not use_bqstorage_api: return None try: - from google.cloud import bigquery_storage # type: ignore # noqa: F401 - except ImportError as err: + _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True) + except exceptions.BigQueryStorageNotFoundError as err: customized_error = ImportError( "The default BigQuery Storage API client cannot be used, install " "the missing google-cloud-bigquery-storage and pyarrow packages " @@ -757,6 +778,8 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): "the --use_rest_api magic option." ) raise customized_error from err + except exceptions.LegacyBigQueryStorageError: + pass try: from google.api_core.gapic_v1 import client_info as gapic_client_info diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a967a1795..633043322 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -60,14 +60,15 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions as bq_exceptions +from google.cloud.bigquery._tqdm_helpers import get_progress_bar +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.enums import DefaultPandasDTypes -from google.cloud.bigquery import exceptions +from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields -from google.cloud.bigquery._tqdm_helpers import get_progress_bar -from google.cloud.bigquery.external_config import ExternalConfig -from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration if typing.TYPE_CHECKING: # pragma: NO COVER # Unconditionally import optional dependencies again to tell pytype that @@ -1593,7 +1594,7 @@ def _is_completely_cached(self): return self._first_page_response.get(self._next_token) is None def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): - """Returns if the BigQuery Storage API can be used. + """Returns True if the BigQuery Storage API can be used. Returns: bool @@ -1610,13 +1611,10 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError: + _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True) + except bq_exceptions.BigQueryStorageNotFoundError: return False - - try: - _helpers.BQ_STORAGE_VERSIONS.verify_version() - except exceptions.LegacyBigQueryStorageError as exc: + except bq_exceptions.LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return False diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 40223f041..e2e2da3c8 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,79 +19,6 @@ import mock -from google.cloud.bigquery import exceptions - -try: - from google.cloud import bigquery_storage # type: ignore -except ImportError: # pragma: NO COVER - bigquery_storage = None - - -@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") -class TestBQStorageVersions(unittest.TestCase): - def tearDown(self): - from google.cloud.bigquery import _helpers - - # Reset any cached versions since it may not match reality. - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - - def _object_under_test(self): - from google.cloud.bigquery import _helpers - - return _helpers.BQStorageVersions() - - def _call_fut(self): - from google.cloud.bigquery import _helpers - - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - return _helpers.BQ_STORAGE_VERSIONS.verify_version() - - def test_raises_no_error_w_recent_bqstorage(self): - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): - try: - self._call_fut() - except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_raises_error_w_legacy_bqstorage(self): - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): - with self.assertRaises(exceptions.LegacyBigQueryStorageError): - self._call_fut() - - def test_raises_error_w_unknown_bqstorage_version(self): - with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: - del fake_module.__version__ - error_pattern = r"version found: 0.0.0" - with self.assertRaisesRegex( - exceptions.LegacyBigQueryStorageError, error_pattern - ): - self._call_fut() - - def test_installed_version_returns_cached(self): - versions = self._object_under_test() - versions._installed_version = object() - assert versions.installed_version is versions._installed_version - - def test_installed_version_returns_parsed_version(self): - versions = self._object_under_test() - - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"): - version = versions.installed_version - - assert version.major == 1 - assert version.minor == 2 - assert version.micro == 3 - - def test_is_read_session_optional_true(self): - versions = self._object_under_test() - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"): - assert versions.is_read_session_optional - - def test_is_read_session_optional_false(self): - versions = self._object_under_test() - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): - assert not versions.is_read_session_optional - class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 7724f308b..212a6f1dd 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -40,7 +40,6 @@ from google import api_core from google.cloud.bigquery import exceptions -from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema @@ -56,12 +55,7 @@ # used in test parameterization. pyarrow = mock.Mock() -try: - from google.cloud import bigquery_storage - - _helpers.BQ_STORAGE_VERSIONS.verify_version() -except ImportError: # pragma: NO COVER - bigquery_storage = None +bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import() PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -1616,7 +1610,9 @@ def test__download_table_bqstorage_stream_includes_read_session( import google.cloud.bigquery_storage_v1.reader import google.cloud.bigquery_storage_v1.types - monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr( + _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None + ) monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") bqstorage_client = mock.create_autospec( bigquery_storage.BigQueryReadClient, instance=True @@ -1641,7 +1637,7 @@ def test__download_table_bqstorage_stream_includes_read_session( @pytest.mark.skipif( bigquery_storage is None - or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + or not _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", ) def test__download_table_bqstorage_stream_omits_read_session( @@ -1650,7 +1646,9 @@ def test__download_table_bqstorage_stream_omits_read_session( import google.cloud.bigquery_storage_v1.reader import google.cloud.bigquery_storage_v1.types - monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr( + _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None + ) monkeypatch.setattr(bigquery_storage, "__version__", "2.6.0") bqstorage_client = mock.create_autospec( bigquery_storage.BigQueryReadClient, instance=True diff --git a/tests/unit/test__versions_helpers.py b/tests/unit/test__versions_helpers.py index 21386610b..144f14b7c 100644 --- a/tests/unit/test__versions_helpers.py +++ b/tests/unit/test__versions_helpers.py @@ -16,12 +16,21 @@ import mock +try: + import pyarrow # type: ignore +except ImportError: # pragma: NO COVER + pyarrow = None + +try: + from google.cloud import bigquery_storage # type: ignore +except ImportError: # pragma: NO COVER + bigquery_storage = None + from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions -pyarrow = pytest.importorskip("pyarrow") - +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") def test_try_import_raises_no_error_w_recent_pyarrow(): versions = _versions_helpers.PyarrowVersions() with mock.patch("pyarrow.__version__", new="5.0.0"): @@ -32,6 +41,7 @@ def test_try_import_raises_no_error_w_recent_pyarrow(): raise ("Legacy error raised with a non-legacy dependency version.") +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") def test_try_import_returns_none_w_legacy_pyarrow(): versions = _versions_helpers.PyarrowVersions() with mock.patch("pyarrow.__version__", new="2.0.0"): @@ -39,6 +49,7 @@ def test_try_import_returns_none_w_legacy_pyarrow(): assert pyarrow is None +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") def test_try_import_raises_error_w_legacy_pyarrow(): versions = _versions_helpers.PyarrowVersions() with mock.patch("pyarrow.__version__", new="2.0.0"): @@ -46,13 +57,15 @@ def test_try_import_raises_error_w_legacy_pyarrow(): versions.try_import(raise_if_error=True) -def test_installed_version_returns_cached(): +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") +def test_installed_pyarrow_version_returns_cached(): versions = _versions_helpers.PyarrowVersions() versions._installed_version = object() assert versions.installed_version is versions._installed_version -def test_installed_version_returns_parsed_version(): +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") +def test_installed_pyarrow_version_returns_parsed_version(): versions = _versions_helpers.PyarrowVersions() with mock.patch("pyarrow.__version__", new="1.2.3"): version = versions.installed_version @@ -60,3 +73,103 @@ def test_installed_version_returns_parsed_version(): assert version.major == 1 assert version.minor == 2 assert version.micro == 3 + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_raises_no_error_w_recent_bqstorage(): + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): + try: + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions.try_import(raise_if_error=True) + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER + raise ("Legacy error raised with a non-legacy dependency version.") + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_raises_error_w_legacy_bqstorage(): + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + with pytest.raises(exceptions.LegacyBigQueryStorageError): + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions.try_import(raise_if_error=True) + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_returns_none_with_legacy_bqstorage(): + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + try: + bqstorage_versions = _versions_helpers.BQStorageVersions() + bq_storage = bqstorage_versions.try_import() + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER + raise ("Legacy error raised when raise_if_error == False.") + assert bq_storage is None + + +@pytest.mark.skipif( + bigquery_storage is not None, + reason="Tests behavior when `google-cloud-bigquery-storage` isn't installed", +) +def test_returns_none_with_bqstorage_uninstalled(): + try: + bqstorage_versions = _versions_helpers.BQStorageVersions() + bq_storage = bqstorage_versions.try_import() + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER + raise ("NotFound error raised when raise_if_error == False.") + assert bq_storage is None + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_raises_error_w_unknown_bqstorage_version(): + with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: + del fake_module.__version__ + error_pattern = r"version found: 0.0.0" + with pytest.raises(exceptions.LegacyBigQueryStorageError, match=error_pattern): + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions.try_import(raise_if_error=True) + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_installed_bqstorage_version_returns_cached(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions._installed_version = object() + assert bqstorage_versions.installed_version is bqstorage_versions._installed_version + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_installed_bqstorage_version_returns_parsed_version(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"): + bqstorage_versions = bqstorage_versions.installed_version + + assert bqstorage_versions.major == 1 + assert bqstorage_versions.minor == 2 + assert bqstorage_versions.micro == 3 + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_bqstorage_is_read_session_optional_true(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"): + assert bqstorage_versions.is_read_session_optional + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_bqstorage_is_read_session_optional_false(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): + assert not bqstorage_versions.is_read_session_optional diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 3143f2123..d470bd9fd 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -826,7 +826,7 @@ def test_ensure_bqstorage_client_obsolete_dependency(self): client = self._make_one(project=self.PROJECT, credentials=creds) patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + "google.cloud.bigquery.client._versions_helpers.BQ_STORAGE_VERSIONS.try_import", side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: @@ -861,7 +861,7 @@ def test_ensure_bqstorage_client_existing_client_check_fails(self): mock_storage_client = mock.sentinel.mock_storage_client patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + "google.cloud.bigquery.client._versions_helpers.BQ_STORAGE_VERSIONS.try_import", side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 0cab943f7..b03894095 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -339,6 +339,9 @@ def test__make_bqstorage_client_true(): def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): + """When package `google-cloud-bigquery-storage` is not installed, reports + ImportError. + """ credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -358,6 +361,9 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test__make_bqstorage_client_true_obsolete_dependency(): + """When package `google-cloud-bigquery-storage` is installed but has outdated + version, returns None, and raises a warning. + """ credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -366,8 +372,10 @@ def test__make_bqstorage_client_true_obsolete_dependency(): ) patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=bq_exceptions.LegacyBigQueryStorageError("BQ Storage too old"), + "google.cloud.bigquery._versions_helpers.BQ_STORAGE_VERSIONS.try_import", + side_effect=bq_exceptions.LegacyBigQueryStorageError( + "google-cloud-bigquery-storage is outdated" + ), ) with patcher, warnings.catch_warnings(record=True) as warned: got = magics._make_bqstorage_client(test_client, True, {}) @@ -375,7 +383,9 @@ def test__make_bqstorage_client_true_obsolete_dependency(): assert got is None matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) + warning + for warning in warned + if "google-cloud-bigquery-storage is outdated" in str(warning) ] assert matching_warnings, "Obsolete dependency warning not raised." diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 65eb659bf..fa2f30cea 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2260,7 +2260,7 @@ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( - "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", + "google.cloud.bigquery.table._versions_helpers.BQ_STORAGE_VERSIONS.try_import", side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: @@ -2866,11 +2866,11 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): mock_client = _mock_client() row_iterator = self._make_one(mock_client, api_request, path, schema) - def mock_verify_version(): + def mock_verify_version(raise_if_error: bool = False): raise exceptions.LegacyBigQueryStorageError("no bqstorage") with mock.patch( - "google.cloud.bigquery._helpers.BQ_STORAGE_VERSIONS.verify_version", + "google.cloud.bigquery._versions_helpers.BQ_STORAGE_VERSIONS.try_import", mock_verify_version, ): tbl = row_iterator.to_arrow(create_bqstorage_client=True) From dfe18867e8f1c5d5571ba842168d723aa9c886e9 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 27 Oct 2023 18:40:04 -0400 Subject: [PATCH 176/536] chore: upgrade mypy (#1705) --- google/cloud/bigquery/client.py | 18 +++++++++--------- google/cloud/bigquery/job/query.py | 4 ++-- google/cloud/bigquery/table.py | 6 +++--- noxfile.py | 4 ++-- .../authenticate_service_account_test.py | 2 +- .../snippets/authorized_view_tutorial_test.py | 2 +- .../snippets/create_partitioned_table_test.py | 2 +- samples/snippets/create_table_cmek_test.py | 2 +- ...e_table_external_data_configuration_test.py | 2 +- ...ate_table_external_hive_partitioned_test.py | 2 +- .../create_table_schema_from_json_test.py | 2 +- samples/snippets/create_table_snapshot_test.py | 2 +- samples/snippets/dataset_access_test.py | 4 ++-- samples/snippets/delete_job_test.py | 2 +- samples/snippets/delete_label_table_test.py | 2 +- samples/snippets/get_table_labels_test.py | 2 +- samples/snippets/get_table_make_schema_test.py | 2 +- samples/snippets/label_table_test.py | 2 +- .../load_table_schema_from_json_test.py | 2 +- .../snippets/load_table_uri_firestore_test.py | 2 +- samples/snippets/manage_job_test.py | 4 ++-- samples/snippets/materialized_view_test.py | 2 +- samples/snippets/natality_tutorial_test.py | 2 +- .../snippets/nested_repeated_schema_test.py | 2 +- samples/snippets/quickstart_test.py | 2 +- samples/snippets/relax_column_test.py | 2 +- samples/snippets/simple_app_test.py | 2 +- samples/snippets/test_update_with_dml.py | 4 ++-- .../snippets/update_table_expiration_test.py | 2 +- samples/snippets/user_credentials_test.py | 2 +- samples/snippets/view_test.py | 2 +- 31 files changed, 46 insertions(+), 46 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e17d6b8da..496015b21 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2182,12 +2182,12 @@ def list_jobs( parent_job: Optional[Union[QueryJob, str]] = None, max_results: Optional[int] = None, page_token: Optional[str] = None, - all_users: bool = None, + all_users: Optional[bool] = None, state_filter: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - min_creation_time: datetime.datetime = None, - max_creation_time: datetime.datetime = None, + min_creation_time: Optional[datetime.datetime] = None, + max_creation_time: Optional[datetime.datetime] = None, page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List jobs for the project associated with this client. @@ -3407,7 +3407,7 @@ def insert_rows( self, table: Union[Table, TableReference, str], rows: Union[Iterable[Tuple], Iterable[Mapping[str, Any]]], - selected_fields: Sequence[SchemaField] = None, + selected_fields: Optional[Sequence[SchemaField]] = None, **kwargs, ) -> Sequence[Dict[str, Any]]: """Insert rows into a table via the streaming API. @@ -3483,7 +3483,7 @@ def insert_rows_from_dataframe( self, table: Union[Table, TableReference, str], dataframe, - selected_fields: Sequence[SchemaField] = None, + selected_fields: Optional[Sequence[SchemaField]] = None, chunk_size: int = 500, **kwargs: Dict, ) -> Sequence[Sequence[dict]]: @@ -3546,8 +3546,8 @@ def insert_rows_json( row_ids: Union[ Iterable[Optional[str]], AutoRowIDs, None ] = AutoRowIDs.GENERATE_UUID, - skip_invalid_rows: bool = None, - ignore_unknown_values: bool = None, + skip_invalid_rows: Optional[bool] = None, + ignore_unknown_values: Optional[bool] = None, template_suffix: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, @@ -3738,7 +3738,7 @@ def list_partitions( def list_rows( self, table: Union[Table, TableListItem, TableReference, str], - selected_fields: Sequence[SchemaField] = None, + selected_fields: Optional[Sequence[SchemaField]] = None, max_results: Optional[int] = None, page_token: Optional[str] = None, start_index: Optional[int] = None, @@ -3851,7 +3851,7 @@ def _list_rows_from_query_results( project: str, schema: SchemaField, total_rows: Optional[int] = None, - destination: Union[Table, TableReference, TableListItem, str] = None, + destination: Optional[Union[Table, TableReference, TableListItem, str]] = None, max_results: Optional[int] = None, start_index: Optional[int] = None, page_size: Optional[int] = None, diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 7de209b8d..57186acbc 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1693,7 +1693,7 @@ def to_arrow( def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, @@ -1879,7 +1879,7 @@ def to_dataframe( def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 633043322..dcba10428 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1853,7 +1853,7 @@ def to_arrow( def to_dataframe_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -1929,7 +1929,7 @@ def to_dataframe_iterable( def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_as_object: bool = False, @@ -2227,7 +2227,7 @@ def __can_cast_timestamp_ns(column): def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_column: Optional[str] = None, diff --git a/noxfile.py b/noxfile.py index 4ddd4eaaf..a2b7a6843 100644 --- a/noxfile.py +++ b/noxfile.py @@ -22,7 +22,7 @@ import nox -MYPY_VERSION = "mypy==0.910" +MYPY_VERSION = "mypy==1.6.1" PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ( @@ -137,7 +137,7 @@ def mypy(session): "types-requests", "types-setuptools", ) - session.run("mypy", "google/cloud") + session.run("mypy", "google/cloud", "--show-traceback") @nox.session(python=DEFAULT_PYTHON_VERSION) diff --git a/samples/snippets/authenticate_service_account_test.py b/samples/snippets/authenticate_service_account_test.py index 4b5711f80..fbdd2d064 100644 --- a/samples/snippets/authenticate_service_account_test.py +++ b/samples/snippets/authenticate_service_account_test.py @@ -17,7 +17,7 @@ import google.auth -import authenticate_service_account +import authenticate_service_account # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/authorized_view_tutorial_test.py b/samples/snippets/authorized_view_tutorial_test.py index cae870486..e2220fb54 100644 --- a/samples/snippets/authorized_view_tutorial_test.py +++ b/samples/snippets/authorized_view_tutorial_test.py @@ -18,7 +18,7 @@ from google.cloud import bigquery import pytest -import authorized_view_tutorial +import authorized_view_tutorial # type: ignore @pytest.fixture(scope="module") diff --git a/samples/snippets/create_partitioned_table_test.py b/samples/snippets/create_partitioned_table_test.py index 0f684fcb0..e4d7ec20e 100644 --- a/samples/snippets/create_partitioned_table_test.py +++ b/samples/snippets/create_partitioned_table_test.py @@ -14,7 +14,7 @@ import typing -import create_partitioned_table +import create_partitioned_table # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/create_table_cmek_test.py b/samples/snippets/create_table_cmek_test.py index 2b15fb350..e8626b84c 100644 --- a/samples/snippets/create_table_cmek_test.py +++ b/samples/snippets/create_table_cmek_test.py @@ -14,7 +14,7 @@ import typing -import create_table_cmek +import create_table_cmek # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/create_table_external_data_configuration_test.py b/samples/snippets/create_table_external_data_configuration_test.py index e97d7170d..bf81a75f9 100644 --- a/samples/snippets/create_table_external_data_configuration_test.py +++ b/samples/snippets/create_table_external_data_configuration_test.py @@ -14,7 +14,7 @@ import typing -import create_table_external_data_configuration +import create_table_external_data_configuration # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/create_table_external_hive_partitioned_test.py b/samples/snippets/create_table_external_hive_partitioned_test.py index 37deb8b12..5b8cbe1c3 100644 --- a/samples/snippets/create_table_external_hive_partitioned_test.py +++ b/samples/snippets/create_table_external_hive_partitioned_test.py @@ -14,7 +14,7 @@ import typing -import create_table_external_hive_partitioned +import create_table_external_hive_partitioned # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/create_table_schema_from_json_test.py b/samples/snippets/create_table_schema_from_json_test.py index 39b00cea0..e725d3ccf 100644 --- a/samples/snippets/create_table_schema_from_json_test.py +++ b/samples/snippets/create_table_schema_from_json_test.py @@ -14,7 +14,7 @@ import typing -import create_table_schema_from_json +import create_table_schema_from_json # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/create_table_snapshot_test.py b/samples/snippets/create_table_snapshot_test.py index 784dc3ddd..17ef24d26 100644 --- a/samples/snippets/create_table_snapshot_test.py +++ b/samples/snippets/create_table_snapshot_test.py @@ -14,7 +14,7 @@ import typing -import create_table_snapshot +import create_table_snapshot # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/dataset_access_test.py b/samples/snippets/dataset_access_test.py index cc6a9af61..e3a53b084 100644 --- a/samples/snippets/dataset_access_test.py +++ b/samples/snippets/dataset_access_test.py @@ -14,8 +14,8 @@ import typing -import revoke_dataset_access -import update_dataset_access +import revoke_dataset_access # type: ignore +import update_dataset_access # type: ignore if typing.TYPE_CHECKING: from google.cloud import bigquery diff --git a/samples/snippets/delete_job_test.py b/samples/snippets/delete_job_test.py index ac9d52dcf..88eeae1ed 100644 --- a/samples/snippets/delete_job_test.py +++ b/samples/snippets/delete_job_test.py @@ -16,7 +16,7 @@ from google.cloud import bigquery -import delete_job +import delete_job # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/delete_label_table_test.py b/samples/snippets/delete_label_table_test.py index 80fcbb695..01e538ae3 100644 --- a/samples/snippets/delete_label_table_test.py +++ b/samples/snippets/delete_label_table_test.py @@ -14,7 +14,7 @@ import typing -import delete_label_table +import delete_label_table # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/get_table_labels_test.py b/samples/snippets/get_table_labels_test.py index 95a95b60f..e910d6a65 100644 --- a/samples/snippets/get_table_labels_test.py +++ b/samples/snippets/get_table_labels_test.py @@ -16,7 +16,7 @@ from google.cloud import bigquery -import get_table_labels +import get_table_labels # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/get_table_make_schema_test.py b/samples/snippets/get_table_make_schema_test.py index 424f16e39..b1a1623bb 100644 --- a/samples/snippets/get_table_make_schema_test.py +++ b/samples/snippets/get_table_make_schema_test.py @@ -14,7 +14,7 @@ import typing -import get_table_make_schema +import get_table_make_schema # type: ignore if typing.TYPE_CHECKING: import pathlib diff --git a/samples/snippets/label_table_test.py b/samples/snippets/label_table_test.py index 98f3b3cc7..49f5406ab 100644 --- a/samples/snippets/label_table_test.py +++ b/samples/snippets/label_table_test.py @@ -14,7 +14,7 @@ import typing -import label_table +import label_table # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/load_table_schema_from_json_test.py b/samples/snippets/load_table_schema_from_json_test.py index c28875b0e..745793cd7 100644 --- a/samples/snippets/load_table_schema_from_json_test.py +++ b/samples/snippets/load_table_schema_from_json_test.py @@ -14,7 +14,7 @@ import typing -import load_table_schema_from_json +import load_table_schema_from_json # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/load_table_uri_firestore_test.py b/samples/snippets/load_table_uri_firestore_test.py index 552fa2e35..e19378a04 100644 --- a/samples/snippets/load_table_uri_firestore_test.py +++ b/samples/snippets/load_table_uri_firestore_test.py @@ -14,7 +14,7 @@ import typing -import load_table_uri_firestore +import load_table_uri_firestore # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/manage_job_test.py b/samples/snippets/manage_job_test.py index 630be365b..2ef4be2e0 100644 --- a/samples/snippets/manage_job_test.py +++ b/samples/snippets/manage_job_test.py @@ -15,8 +15,8 @@ from google.cloud import bigquery import pytest -import manage_job_cancel -import manage_job_get +import manage_job_cancel # type: ignore +import manage_job_get # type: ignore def test_manage_job(capsys: pytest.CaptureFixture[str]) -> None: diff --git a/samples/snippets/materialized_view_test.py b/samples/snippets/materialized_view_test.py index 70869346f..59e08131e 100644 --- a/samples/snippets/materialized_view_test.py +++ b/samples/snippets/materialized_view_test.py @@ -20,7 +20,7 @@ from google.cloud import bigquery import pytest -import materialized_view +import materialized_view # type: ignore def temp_suffix() -> str: diff --git a/samples/snippets/natality_tutorial_test.py b/samples/snippets/natality_tutorial_test.py index f56738528..7f24ca5cb 100644 --- a/samples/snippets/natality_tutorial_test.py +++ b/samples/snippets/natality_tutorial_test.py @@ -18,7 +18,7 @@ from google.cloud import bigquery import pytest -import natality_tutorial +import natality_tutorial # type: ignore @pytest.fixture(scope="module") diff --git a/samples/snippets/nested_repeated_schema_test.py b/samples/snippets/nested_repeated_schema_test.py index 8bb8bda6a..67815dcf6 100644 --- a/samples/snippets/nested_repeated_schema_test.py +++ b/samples/snippets/nested_repeated_schema_test.py @@ -14,7 +14,7 @@ import typing -import nested_repeated_schema +import nested_repeated_schema # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py index 98a5fdd4e..88a24618d 100644 --- a/samples/snippets/quickstart_test.py +++ b/samples/snippets/quickstart_test.py @@ -18,7 +18,7 @@ from google.cloud import bigquery import pytest -import quickstart +import quickstart # type: ignore # Must match the dataset listed in quickstart.py (there's no easy way to # extract this). diff --git a/samples/snippets/relax_column_test.py b/samples/snippets/relax_column_test.py index b40b13fa1..ede1c3ab7 100644 --- a/samples/snippets/relax_column_test.py +++ b/samples/snippets/relax_column_test.py @@ -16,7 +16,7 @@ from google.cloud import bigquery -import relax_column +import relax_column # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/simple_app_test.py b/samples/snippets/simple_app_test.py index de4e1ce34..4bf0bb49c 100644 --- a/samples/snippets/simple_app_test.py +++ b/samples/snippets/simple_app_test.py @@ -14,7 +14,7 @@ import typing -import simple_app +import simple_app # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py index ef5ec196a..d03114a36 100644 --- a/samples/snippets/test_update_with_dml.py +++ b/samples/snippets/test_update_with_dml.py @@ -17,8 +17,8 @@ from google.cloud import bigquery import pytest -from conftest import prefixer -import update_with_dml +from conftest import prefixer # type: ignore +import update_with_dml # type: ignore @pytest.fixture diff --git a/samples/snippets/update_table_expiration_test.py b/samples/snippets/update_table_expiration_test.py index 1566c7763..ed68a8c2c 100644 --- a/samples/snippets/update_table_expiration_test.py +++ b/samples/snippets/update_table_expiration_test.py @@ -15,7 +15,7 @@ import datetime import typing -import update_table_expiration +import update_table_expiration # type: ignore if typing.TYPE_CHECKING: import pathlib diff --git a/samples/snippets/user_credentials_test.py b/samples/snippets/user_credentials_test.py index df8a6354d..8448187de 100644 --- a/samples/snippets/user_credentials_test.py +++ b/samples/snippets/user_credentials_test.py @@ -19,7 +19,7 @@ import mock import pytest -from user_credentials import main +from user_credentials import main # type: ignore PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] diff --git a/samples/snippets/view_test.py b/samples/snippets/view_test.py index 4d0d43b77..1e615db47 100644 --- a/samples/snippets/view_test.py +++ b/samples/snippets/view_test.py @@ -19,7 +19,7 @@ from google.cloud import bigquery import pytest -import view +import view # type: ignore def temp_suffix() -> str: From 5734cbf78cb2eeb58775cf81a3b27219d9737aea Mon Sep 17 00:00:00 2001 From: meredithslota Date: Fri, 27 Oct 2023 16:11:15 -0700 Subject: [PATCH 177/536] chore(samples): Update snippets.py to dedupe region tags (#1701) Wait until cl/576311555 is submitted and snippetbot updates. --- docs/snippets.py | 96 ------------------------------------------------ 1 file changed, 96 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 64f5361cd..72ac2a000 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -118,54 +118,6 @@ def test_create_client_default_credentials(): assert client is not None -def test_create_partitioned_table(client, to_delete): - dataset_id = "create_table_partitioned_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = client.create_dataset(dataset_ref) - to_delete.append(dataset) - - # TODO(tswast): remove this snippet once cloud.google.com is updated to use - # samples/snippets/create_partitioned_table.py - # [START bigquery_create_table_partitioned] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') - - table_ref = dataset_ref.table("my_partitioned_table") - schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - bigquery.SchemaField("date", "DATE"), - ] - table = bigquery.Table(table_ref, schema=schema) - table.time_partitioning = bigquery.TimePartitioning( - type_=bigquery.TimePartitioningType.DAY, - field="date", # name of column to use for partitioning - expiration_ms=7776000000, - ) # 90 days - - table = client.create_table(table) - - print( - "Created table {}, partitioned on column {}".format( - table.table_id, table.time_partitioning.field - ) - ) - # [END bigquery_create_table_partitioned] - - assert table.time_partitioning.type_ == "DAY" - assert table.time_partitioning.field == "date" - assert table.time_partitioning.expiration_ms == 7776000000 - - -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) @pytest.mark.skip( reason=( "update_table() is flaky " @@ -203,54 +155,6 @@ def test_update_table_description(client, to_delete): # [END bigquery_update_table_description] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_relax_column(client, to_delete): - """Updates a schema field from required to nullable.""" - dataset_id = "relax_column_dataset_{}".format(_millis()) - table_id = "relax_column_table_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - # TODO(tswast): remove code sample once references to it on - # cloud.google.com are updated to samples/snippets/relax_column.py - # [START bigquery_relax_column] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - original_schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - - dataset_ref = bigquery.DatasetReference(project, dataset_id) - table_ref = dataset_ref.table(table_id) - table = bigquery.Table(table_ref, schema=original_schema) - table = client.create_table(table) - assert all(field.mode == "REQUIRED" for field in table.schema) - - # SchemaField properties cannot be edited after initialization. - # To make changes, construct new SchemaField objects. - relaxed_schema = [ - bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), - ] - table.schema = relaxed_schema - table = client.update_table(table, ["schema"]) - - assert all(field.mode == "NULLABLE" for field in table.schema) - # [END bigquery_relax_column] - - @pytest.mark.skip( reason=( "update_table() is flaky " From 83bc768b90a852d258a4805603020a296e02d2f9 Mon Sep 17 00:00:00 2001 From: Gaurang Shah Date: Sat, 28 Oct 2023 02:29:08 -0400 Subject: [PATCH 178/536] feat: add support for dataset.default_rounding_mode (#1688) Co-authored-by: Lingqing Gan --- google/cloud/bigquery/dataset.py | 38 +++++++++++ tests/system/test_client.py | 16 ++++- tests/unit/test_create_dataset.py | 103 +++++++++++++++++++++++++++++- 3 files changed, 153 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 0f1a0f3cc..af94784a4 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -525,6 +525,7 @@ class Dataset(object): "friendly_name": "friendlyName", "default_encryption_configuration": "defaultEncryptionConfiguration", "storage_billing_model": "storageBillingModel", + "default_rounding_mode": "defaultRoundingMode", } def __init__(self, dataset_ref) -> None: @@ -532,6 +533,43 @@ def __init__(self, dataset_ref) -> None: dataset_ref = DatasetReference.from_string(dataset_ref) self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} + @property + def default_rounding_mode(self): + """Union[str, None]: defaultRoundingMode of the dataset as set by the user + (defaults to :data:`None`). + + Set the value to one of ``'ROUND_HALF_AWAY_FROM_ZERO'``, ``'ROUND_HALF_EVEN'``, or + ``'ROUNDING_MODE_UNSPECIFIED'``. + + See `default rounding mode + `_ + in REST API docs and `updating the default rounding model + `_ + guide. + + Raises: + ValueError: for invalid value types. + """ + return self._properties.get("defaultRoundingMode") + + @default_rounding_mode.setter + def default_rounding_mode(self, value): + possible_values = [ + "ROUNDING_MODE_UNSPECIFIED", + "ROUND_HALF_AWAY_FROM_ZERO", + "ROUND_HALF_EVEN", + ] + if not isinstance(value, str) and value is not None: + raise ValueError("Pass a string, or None") + if value is None: + self._properties["defaultRoundingMode"] = "ROUNDING_MODE_UNSPECIFIED" + if value not in possible_values and value is not None: + raise ValueError( + f'rounding mode needs to be one of {",".join(possible_values)}' + ) + if value: + self._properties["defaultRoundingMode"] = value + @property def project(self): """str: Project ID of the project bound to the dataset.""" diff --git a/tests/system/test_client.py b/tests/system/test_client.py index d3b95ec49..09606590e 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -265,6 +265,13 @@ def test_get_dataset(self): self.assertEqual(got.friendly_name, "Friendly") self.assertEqual(got.description, "Description") + def test_create_dataset_with_default_rounding_mode(self): + DATASET_ID = _make_dataset_id("create_dataset_rounding_mode") + dataset = self.temp_dataset(DATASET_ID, default_rounding_mode="ROUND_HALF_EVEN") + + self.assertTrue(_dataset_exists(dataset)) + self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN") + def test_update_dataset(self): dataset = self.temp_dataset(_make_dataset_id("update_dataset")) self.assertTrue(_dataset_exists(dataset)) @@ -2286,12 +2293,15 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - def temp_dataset(self, dataset_id, location=None): + def temp_dataset(self, dataset_id, *args, **kwargs): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = Dataset(dataset_ref) - if location: - dataset.location = location + if kwargs.get("location"): + dataset.location = kwargs.get("location") + if kwargs.get("default_rounding_mode"): + dataset.default_rounding_mode = kwargs.get("default_rounding_mode") + dataset = helpers.retry_403(Config.CLIENT.create_dataset)(dataset) self.to_delete.append(dataset) return dataset diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index 81af52261..3b2e644d9 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -63,6 +63,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "datasetId": "starry-skies", "tableId": "northern-hemisphere", } + DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN" RESOURCE = { "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, "etag": "etag", @@ -73,6 +74,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "defaultTableExpirationMs": "3600", "labels": LABELS, "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + "defaultRoundingMode": DEFAULT_ROUNDING_MODE, } conn = client._connection = make_connection(RESOURCE) entries = [ @@ -88,8 +90,8 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): before.default_table_expiration_ms = 3600 before.location = LOCATION before.labels = LABELS + before.default_rounding_mode = DEFAULT_ROUNDING_MODE after = client.create_dataset(before) - assert after.dataset_id == DS_ID assert after.project == PROJECT assert after.etag == RESOURCE["etag"] @@ -99,6 +101,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): assert after.location == LOCATION assert after.default_table_expiration_ms == 3600 assert after.labels == LABELS + assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE conn.api_request.assert_called_once_with( method="POST", @@ -109,6 +112,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "friendlyName": FRIENDLY_NAME, "location": LOCATION, "defaultTableExpirationMs": "3600", + "defaultRoundingMode": DEFAULT_ROUNDING_MODE, "access": [ {"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW, "role": None}, @@ -365,3 +369,100 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) + + +def test_create_dataset_with_default_rounding_mode_if_value_is_none( + PROJECT, DS_ID, LOCATION +): + default_rounding_mode = None + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.default_rounding_mode = default_rounding_mode + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.default_rounding_mode is None + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + "defaultRoundingMode": "ROUNDING_MODE_UNSPECIFIED", + }, + timeout=DEFAULT_TIMEOUT, + ) + + +def test_create_dataset_with_default_rounding_mode_if_value_is_not_string( + PROJECT, DS_ID, LOCATION +): + default_rounding_mode = 10 + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.default_rounding_mode = default_rounding_mode + assert str(e.value) == "Pass a string, or None" + + +def test_create_dataset_with_default_rounding_mode_if_value_is_not_in_possible_values( + PROJECT, DS_ID +): + default_rounding_mode = "ROUND_HALF_AWAY_FROM_ZEROS" + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.default_rounding_mode = default_rounding_mode + assert ( + str(e.value) + == "rounding mode needs to be one of ROUNDING_MODE_UNSPECIFIED,ROUND_HALF_AWAY_FROM_ZERO,ROUND_HALF_EVEN" + ) + + +def test_create_dataset_with_default_rounding_mode_if_value_is_in_possible_values( + PROJECT, DS_ID, LOCATION +): + default_rounding_mode = "ROUND_HALF_AWAY_FROM_ZERO" + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.default_rounding_mode = default_rounding_mode + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.default_rounding_mode is None + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + "defaultRoundingMode": default_rounding_mode, + }, + timeout=DEFAULT_TIMEOUT, + ) From c2d95e3654f8fe8496d6cf963ef47e6cfdfe0843 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 11:49:31 -0700 Subject: [PATCH 179/536] chore(deps): bump urllib3 from 1.26.15 to 1.26.18 in /samples/geography (#1684) Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.15 to 1.26.18. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.15...1.26.18) --- updated-dependencies: - dependency-name: urllib3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 714e032ad..9bc6ee32c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -42,4 +42,4 @@ Shapely==2.0.1 six==1.16.0 typing-extensions==4.7.1 typing-inspect==0.9.0 -urllib3==1.26.15 +urllib3==1.26.18 From 49bfd124cc76a719acf9257610181612d1452e56 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:47:00 -0400 Subject: [PATCH 180/536] chore(main): release 3.13.0 (#1676) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 20 ++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a93bde9eb..41206fd78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,26 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.13.0](https://github.com/googleapis/python-bigquery/compare/v3.12.0...v3.13.0) (2023-10-30) + + +### Features + +* Add `Model.transform_columns` property ([#1661](https://github.com/googleapis/python-bigquery/issues/1661)) ([5ceed05](https://github.com/googleapis/python-bigquery/commit/5ceed056482f6d1f2fc45e7e6b84382de45c85ed)) +* Add support for dataset.default_rounding_mode ([#1688](https://github.com/googleapis/python-bigquery/issues/1688)) ([83bc768](https://github.com/googleapis/python-bigquery/commit/83bc768b90a852d258a4805603020a296e02d2f9)) + + +### Bug Fixes + +* AccessEntry API representation parsing ([#1682](https://github.com/googleapis/python-bigquery/issues/1682)) ([a40d7ae](https://github.com/googleapis/python-bigquery/commit/a40d7ae03149708fc34c962b43a6ac198780b6aa)) + + +### Documentation + +* Remove redundant `bigquery_update_table_expiration` code sample ([#1673](https://github.com/googleapis/python-bigquery/issues/1673)) ([2dded33](https://github.com/googleapis/python-bigquery/commit/2dded33626b3de6c4ab5e1229eb4c85786b2ff53)) +* Revised `create_partitioned_table` sample ([#1447](https://github.com/googleapis/python-bigquery/issues/1447)) ([40ba859](https://github.com/googleapis/python-bigquery/commit/40ba859059c3e463e17ea7781bc5a9aff8244c5d)) +* Revised relax column mode sample ([#1467](https://github.com/googleapis/python-bigquery/issues/1467)) ([b8c9276](https://github.com/googleapis/python-bigquery/commit/b8c9276be011d971b941b583fd3d4417d438067f)) + ## [3.12.0](https://github.com/googleapis/python-bigquery/compare/v3.11.4...v3.12.0) (2023-10-02) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index ea71d198b..ee029aced 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.12.0" +__version__ = "3.13.0" From f22eff25f116f1c4973ac2b8b03bc8a4ae1f3f42 Mon Sep 17 00:00:00 2001 From: Gaurang Shah Date: Tue, 31 Oct 2023 19:04:41 -0400 Subject: [PATCH 181/536] feat: add support dataset.max_time_travel_hours (#1683) * feat: add support dataset.max_time_travel_hours * Update tests/unit/test_create_dataset.py * Update tests/unit/test_create_dataset.py * Update google/cloud/bigquery/dataset.py * update test_create_dataset_with_max_time_travel_hours --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/dataset.py | 23 +++++++++ tests/system/test_client.py | 7 +++ tests/unit/test_create_dataset.py | 79 +++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index af94784a4..726a2a17a 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -525,6 +525,7 @@ class Dataset(object): "friendly_name": "friendlyName", "default_encryption_configuration": "defaultEncryptionConfiguration", "storage_billing_model": "storageBillingModel", + "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", } @@ -533,6 +534,28 @@ def __init__(self, dataset_ref) -> None: dataset_ref = DatasetReference.from_string(dataset_ref) self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} + @property + def max_time_travel_hours(self): + """ + Optional[int]: Defines the time travel window in hours. The value can + be from 48 to 168 hours (2 to 7 days), and in multiple of 24 hours + (48, 72, 96, 120, 144, 168). + The default value is 168 hours if this is not set. + """ + return self._properties.get("maxTimeTravelHours") + + @max_time_travel_hours.setter + def max_time_travel_hours(self, hours): + if not isinstance(hours, int): + raise ValueError(f"max_time_travel_hours must be an integer. Got {hours}") + if hours < 2 * 24 or hours > 7 * 24: + raise ValueError( + "Time Travel Window should be from 48 to 168 hours (2 to 7 days)" + ) + if hours % 24 != 0: + raise ValueError("Time Travel Window should be multiple of 24") + self._properties["maxTimeTravelHours"] = hours + @property def default_rounding_mode(self): """Union[str, None]: defaultRoundingMode of the dataset as set by the user diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 09606590e..9660d5fa7 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -238,6 +238,11 @@ def test_create_dataset(self): self.assertEqual(dataset.dataset_id, DATASET_ID) self.assertEqual(dataset.project, Config.CLIENT.project) + def test_create_dataset_max_time_travel_hours(self): + DATASET_ID = _make_dataset_id("create_ci_dataset") + dataset = self.temp_dataset(DATASET_ID, max_time_travel_hours=24 * 2) + self.assertEqual(int(dataset.max_time_travel_hours), 24 * 2) + def test_get_dataset(self): dataset_id = _make_dataset_id("get_dataset") client = Config.CLIENT @@ -2299,6 +2304,8 @@ def temp_dataset(self, dataset_id, *args, **kwargs): dataset = Dataset(dataset_ref) if kwargs.get("location"): dataset.location = kwargs.get("location") + if kwargs.get("max_time_travel_hours"): + dataset.max_time_travel_hours = kwargs.get("max_time_travel_hours") if kwargs.get("default_rounding_mode"): dataset.default_rounding_mode = kwargs.get("default_rounding_mode") diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index 3b2e644d9..8374e6e0a 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -466,3 +466,82 @@ def test_create_dataset_with_default_rounding_mode_if_value_is_in_possible_value }, timeout=DEFAULT_TIMEOUT, ) + + +def test_create_dataset_with_max_time_travel_hours(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + max_time_travel_hours = 24 * 3 + + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + "maxTimeTravelHours": max_time_travel_hours, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.max_time_travel_hours = max_time_travel_hours + after = client.create_dataset(before) + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.max_time_travel_hours == max_time_travel_hours + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + "maxTimeTravelHours": max_time_travel_hours, + }, + timeout=DEFAULT_TIMEOUT, + ) + + +def test_create_dataset_with_max_time_travel_hours_not_multiple_of_24( + PROJECT, DS_ID, LOCATION +): + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.max_time_travel_hours = 50 + assert str(e.value) == "Time Travel Window should be multiple of 24" + + +def test_create_dataset_with_max_time_travel_hours_is_less_than_2_days( + PROJECT, DS_ID, LOCATION +): + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.max_time_travel_hours = 24 + assert ( + str(e.value) + == "Time Travel Window should be from 48 to 168 hours (2 to 7 days)" + ) + + +def test_create_dataset_with_max_time_travel_hours_is_greater_than_7_days( + PROJECT, DS_ID, LOCATION +): + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.max_time_travel_hours = 192 + assert ( + str(e.value) + == "Time Travel Window should be from 48 to 168 hours (2 to 7 days)" + ) + + +def test_create_dataset_with_max_time_travel_hours_is_not_int(PROJECT, DS_ID, LOCATION): + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.max_time_travel_hours = "50" + assert str(e.value) == "max_time_travel_hours must be an integer. Got 50" From 386fa86c89b8cff69fc02213254a1c53c02fee42 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Riano Date: Wed, 1 Nov 2023 20:15:32 +0100 Subject: [PATCH 182/536] feat: add support for Dataset.isCaseInsensitive (#1671) * feat: add support for Dataset.isCaseInsensitive This commit creates a property named is_case_insensitive (in dataset.py) that allows the usage of the isCaseSensitive field in the Dataset REST API. Fixes: https://github.com/googleapis/python-bigquery/issues/1670 * tests: add unit tests for dataset.is_case_insensitive * docs: improve comments for dataset.is_case_sensitive (code and tests) * docs: improve docstring of is_case_insensitive Co-authored-by: Lingqing Gan * Update tests/system/test_client.py --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/dataset.py | 20 +++++++++++ tests/system/test_client.py | 61 ++++++++++++++++++++++++++++++-- tests/unit/test_dataset.py | 25 +++++++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 726a2a17a..c313045ce 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -524,6 +524,7 @@ class Dataset(object): "default_table_expiration_ms": "defaultTableExpirationMs", "friendly_name": "friendlyName", "default_encryption_configuration": "defaultEncryptionConfiguration", + "is_case_insensitive": "isCaseInsensitive", "storage_billing_model": "storageBillingModel", "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", @@ -822,6 +823,25 @@ def default_encryption_configuration(self, value): api_repr = value.to_api_repr() self._properties["defaultEncryptionConfiguration"] = api_repr + @property + def is_case_insensitive(self): + """Optional[bool]: True if the dataset and its table names are case-insensitive, otherwise False. + By default, this is False, which means the dataset and its table names are case-sensitive. + This field does not affect routine references. + + Raises: + ValueError: for invalid value types. + """ + return self._properties.get("isCaseInsensitive") or False + + @is_case_insensitive.setter + def is_case_insensitive(self, value): + if not isinstance(value, bool) and value is not None: + raise ValueError("Pass a boolean value, or None") + if value is None: + value = False + self._properties["isCaseInsensitive"] = value + @property def storage_billing_model(self): """Union[str, None]: StorageBillingModel of the dataset as set by the user diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 9660d5fa7..c8ff551ce 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -237,6 +237,17 @@ def test_create_dataset(self): self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.dataset_id, DATASET_ID) self.assertEqual(dataset.project, Config.CLIENT.project) + self.assertIs(dataset.is_case_insensitive, False) + + def test_create_dataset_case_sensitive(self): + DATASET_ID = _make_dataset_id("create_cs_dataset") + dataset = self.temp_dataset(DATASET_ID, is_case_insensitive=False) + self.assertIs(dataset.is_case_insensitive, False) + + def test_create_dataset_case_insensitive(self): + DATASET_ID = _make_dataset_id("create_ci_dataset") + dataset = self.temp_dataset(DATASET_ID, is_case_insensitive=True) + self.assertIs(dataset.is_case_insensitive, True) def test_create_dataset_max_time_travel_hours(self): DATASET_ID = _make_dataset_id("create_ci_dataset") @@ -283,16 +294,19 @@ def test_update_dataset(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) self.assertEqual(dataset.labels, {}) + self.assertIs(dataset.is_case_insensitive, False) dataset.friendly_name = "Friendly" dataset.description = "Description" dataset.labels = {"priority": "high", "color": "blue"} + dataset.is_case_insensitive = True ds2 = Config.CLIENT.update_dataset( - dataset, ("friendly_name", "description", "labels") + dataset, ("friendly_name", "description", "labels", "is_case_insensitive") ) self.assertEqual(ds2.friendly_name, "Friendly") self.assertEqual(ds2.description, "Description") self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"}) + self.assertIs(ds2.is_case_insensitive, True) ds2.labels = { "color": "green", # change @@ -347,6 +361,48 @@ def test_create_table(self): self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_id) + def test_create_tables_in_case_insensitive_dataset(self): + ci_dataset = self.temp_dataset( + _make_dataset_id("create_table"), is_case_insensitive=True + ) + table_arg = Table(ci_dataset.table("test_table2"), schema=SCHEMA) + tablemc_arg = Table(ci_dataset.table("Test_taBLe2")) # same name, in Mixed Case + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table_arg)) + self.assertTrue(_table_exists(tablemc_arg)) + self.assertIs(ci_dataset.is_case_insensitive, True) + + def test_create_tables_in_case_sensitive_dataset(self): + ci_dataset = self.temp_dataset( + _make_dataset_id("create_table"), is_case_insensitive=False + ) + table_arg = Table(ci_dataset.table("test_table3"), schema=SCHEMA) + tablemc_arg = Table(ci_dataset.table("Test_taBLe3")) # same name, in Mixed Case + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table_arg)) + self.assertFalse(_table_exists(tablemc_arg)) + self.assertIs(ci_dataset.is_case_insensitive, False) + + def test_create_tables_in_default_sensitivity_dataset(self): + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_arg = Table(dataset.table("test_table4"), schema=SCHEMA) + tablemc_arg = Table( + dataset.table("Test_taBLe4") + ) # same name, in MC (Mixed Case) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table_arg)) + self.assertFalse(_table_exists(tablemc_arg)) + self.assertIs(dataset.is_case_insensitive, False) + def test_create_table_with_real_custom_policy(self): from google.cloud.bigquery.schema import PolicyTagList @@ -2308,7 +2364,8 @@ def temp_dataset(self, dataset_id, *args, **kwargs): dataset.max_time_travel_hours = kwargs.get("max_time_travel_hours") if kwargs.get("default_rounding_mode"): dataset.default_rounding_mode = kwargs.get("default_rounding_mode") - + if kwargs.get("is_case_insensitive"): + dataset.is_case_insensitive = kwargs.get("is_case_insensitive") dataset = helpers.retry_403(Config.CLIENT.create_dataset)(dataset) self.to_delete.append(dataset) return dataset diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 0a709ab43..423349a51 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -744,6 +744,9 @@ def _verify_resource_properties(self, dataset, resource): self.assertEqual(dataset.description, resource.get("description")) self.assertEqual(dataset.friendly_name, resource.get("friendlyName")) self.assertEqual(dataset.location, resource.get("location")) + self.assertEqual( + dataset.is_case_insensitive, resource.get("isCaseInsensitive") or False + ) if "defaultEncryptionConfiguration" in resource: self.assertEqual( dataset.default_encryption_configuration.kms_key_name, @@ -781,6 +784,7 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.description) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) + self.assertEqual(dataset.is_case_insensitive, False) def test_ctor_string(self): dataset = self._make_one("some-project.some_dset") @@ -818,6 +822,7 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.description) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) + self.assertEqual(dataset.is_case_insensitive, False) def test_access_entries_setter_non_list(self): dataset = self._make_one(self.DS_REF) @@ -910,6 +915,26 @@ def test_labels_getter_missing_value(self): dataset = self._make_one(self.DS_REF) self.assertEqual(dataset.labels, {}) + def test_is_case_insensitive_setter_bad_value(self): + dataset = self._make_one(self.DS_REF) + with self.assertRaises(ValueError): + dataset.is_case_insensitive = 0 + + def test_is_case_insensitive_setter_true(self): + dataset = self._make_one(self.DS_REF) + dataset.is_case_insensitive = True + self.assertEqual(dataset.is_case_insensitive, True) + + def test_is_case_insensitive_setter_none(self): + dataset = self._make_one(self.DS_REF) + dataset.is_case_insensitive = None + self.assertEqual(dataset.is_case_insensitive, False) + + def test_is_case_insensitive_setter_false(self): + dataset = self._make_one(self.DS_REF) + dataset.is_case_insensitive = False + self.assertEqual(dataset.is_case_insensitive, False) + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} From eff365dc17755d0855338e2f273428ffe2056f67 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 1 Nov 2023 19:49:09 -0400 Subject: [PATCH 183/536] feat: support data_governance_type (#1708) * feat: support data_governance_type * remove value validation, add sys test --- google/cloud/bigquery/routine/routine.py | 24 +++++++++++- tests/system/test_client.py | 36 ++++++++++++++++++ tests/unit/routine/test_routine.py | 47 ++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index ef33d507e..83cb6362d 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -68,6 +68,7 @@ class Routine(object): "description": "description", "determinism_level": "determinismLevel", "remote_function_options": "remoteFunctionOptions", + "data_governance_type": "dataGovernanceType", } def __init__(self, routine_ref, **kwargs) -> None: @@ -300,8 +301,8 @@ def determinism_level(self, value): @property def remote_function_options(self): - """Optional[google.cloud.bigquery.routine.RemoteFunctionOptions]: Configures remote function - options for a routine. + """Optional[google.cloud.bigquery.routine.RemoteFunctionOptions]: + Configures remote function options for a routine. Raises: ValueError: @@ -329,6 +330,25 @@ def remote_function_options(self, value): self._PROPERTY_TO_API_FIELD["remote_function_options"] ] = api_repr + @property + def data_governance_type(self): + """Optional[str]: If set to ``DATA_MASKING``, the function is validated + and made available as a masking function. + + Raises: + ValueError: + If the value is not :data:`string` or :data:`None`. + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["data_governance_type"]) + + @data_governance_type.setter + def data_governance_type(self, value): + if value is not None and not isinstance(value, str): + raise ValueError( + "invalid data_governance_type, must be a string or `None`." + ) + self._properties[self._PROPERTY_TO_API_FIELD["data_governance_type"]] = value + @classmethod def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index c8ff551ce..7cea8cfa4 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -13,6 +13,7 @@ # limitations under the License. import base64 +import copy import csv import datetime import decimal @@ -2236,6 +2237,41 @@ def test_create_tvf_routine(self): ] assert result_rows == expected + def test_create_routine_w_data_governance(self): + routine_name = "routine_with_data_governance" + dataset = self.temp_dataset(_make_dataset_id("create_routine")) + + routine = bigquery.Routine( + dataset.routine(routine_name), + type_="SCALAR_FUNCTION", + language="SQL", + body="x", + arguments=[ + bigquery.RoutineArgument( + name="x", + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), + ) + ], + data_governance_type="DATA_MASKING", + return_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), + ) + routine_original = copy.deepcopy(routine) + + client = Config.CLIENT + routine_new = client.create_routine(routine) + + assert routine_new.reference == routine_original.reference + assert routine_new.type_ == routine_original.type_ + assert routine_new.language == routine_original.language + assert routine_new.body == routine_original.body + assert routine_new.arguments == routine_original.arguments + assert routine_new.return_type == routine_original.return_type + assert routine_new.data_governance_type == routine_original.data_governance_type + def test_create_table_rows_fetch_nested_schema(self): table_name = "test_table" dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index 87767200c..acd3bc40e 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -154,6 +154,7 @@ def test_from_api_repr(target_class): "foo": "bar", }, }, + "dataGovernanceType": "DATA_MASKING", } actual_routine = target_class.from_api_repr(resource) @@ -192,6 +193,7 @@ def test_from_api_repr(target_class): assert actual_routine.remote_function_options.connection == "connection_string" assert actual_routine.remote_function_options.max_batching_rows == 50 assert actual_routine.remote_function_options.user_defined_context == {"foo": "bar"} + assert actual_routine.data_governance_type == "DATA_MASKING" def test_from_api_repr_tvf_function(target_class): @@ -294,6 +296,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.description is None assert actual_routine.determinism_level is None assert actual_routine.remote_function_options is None + assert actual_routine.data_governance_type is None def test_from_api_repr_w_unknown_fields(target_class): @@ -428,6 +431,20 @@ def test_from_api_repr_w_unknown_fields(target_class): "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED }, ), + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, + "dataGovernanceType": "DATA_MASKING", + }, + ["data_governance_type"], + {"dataGovernanceType": "DATA_MASKING"}, + ), ( {}, [ @@ -554,6 +571,36 @@ def test_set_remote_function_options_w_none(object_under_test): assert object_under_test._properties["remoteFunctionOptions"] is None +def test_set_data_governance_type_w_none(object_under_test): + object_under_test.data_governance_type = None + assert object_under_test.data_governance_type is None + assert object_under_test._properties["dataGovernanceType"] is None + + +def test_set_data_governance_type_valid(object_under_test): + object_under_test.data_governance_type = "DATA_MASKING" + assert object_under_test.data_governance_type == "DATA_MASKING" + assert object_under_test._properties["dataGovernanceType"] == "DATA_MASKING" + + +def test_set_data_governance_type_wrong_type(object_under_test): + with pytest.raises(ValueError) as exp: + object_under_test.data_governance_type = 1 + assert "invalid data_governance_type" in str(exp) + assert object_under_test.data_governance_type is None + assert object_under_test._properties.get("dataGovernanceType") is None + + +def test_set_data_governance_type_wrong_str(object_under_test): + """Client does not verify the content of data_governance_type string to be + compatible with future upgrades. If the value is not supported, BigQuery + itself will report an error. + """ + object_under_test.data_governance_type = "RANDOM_STRING" + assert object_under_test.data_governance_type == "RANDOM_STRING" + assert object_under_test._properties["dataGovernanceType"] == "RANDOM_STRING" + + def test_repr(target_class): model = target_class("my-proj.my_dset.my_routine") actual_routine = repr(model) From 40bc24479305a052c3d4a68aba4bdcedaba29d1c Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Fri, 3 Nov 2023 14:22:44 -0400 Subject: [PATCH 184/536] chore: update docfx minimum Python version (#1712) --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index a2b7a6843..703e36cbb 100644 --- a/noxfile.py +++ b/noxfile.py @@ -427,7 +427,7 @@ def docs(session): ) -@nox.session(python="3.9") +@nox.session(python="3.10") def docfx(session): """Build the docfx yaml files for this library.""" From afa9752dba1cd575565cd79859fa74399ec12eb1 Mon Sep 17 00:00:00 2001 From: Kira Date: Tue, 7 Nov 2023 13:08:23 -0800 Subject: [PATCH 185/536] chore: refactor version checks for pandas library (#1711) * chore: refactor version checks for pandas library * readded removed importing of pandas * revert bad commit * merged from main, added type:ignore tag to get around mypy error * Added ignore statement for mypy error, removed checking max version of Pandas * updated docstring error * Added parameterize to test to test multiple supported versons --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/__init__.py | 1 + google/cloud/bigquery/_versions_helpers.py | 63 ++++++++++++++++++++++ google/cloud/bigquery/client.py | 6 ++- google/cloud/bigquery/exceptions.py | 4 ++ tests/unit/test__versions_helpers.py | 51 ++++++++++++++++++ 5 files changed, 123 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 40e3a1578..72576e608 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -202,6 +202,7 @@ # Custom exceptions "LegacyBigQueryStorageError", "LegacyPyarrowError", + "LegacyPandasError", ] diff --git a/google/cloud/bigquery/_versions_helpers.py b/google/cloud/bigquery/_versions_helpers.py index ce529b76e..4ff4b9700 100644 --- a/google/cloud/bigquery/_versions_helpers.py +++ b/google/cloud/bigquery/_versions_helpers.py @@ -24,6 +24,7 @@ _MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") +_MIN_PANDAS_VERSION = packaging.version.Version("1.1.0") class PyarrowVersions: @@ -171,3 +172,65 @@ def try_import(self, raise_if_error: bool = False) -> Any: BQ_STORAGE_VERSIONS = BQStorageVersions() + + +class PandasVersions: + """Version comparisons for pandas package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of pandas""" + if self._installed_version is None: + import pandas # type: ignore + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(pandas, "__version__", "0.0.0") + ) + + return self._installed_version + + def try_import(self, raise_if_error: bool = False) -> Any: + """Verify that a recent enough version of pandas extra is installed. + The function assumes that pandas extra is installed, and should thus + be used in places where this assumption holds. + Because `pip` can install an outdated version of this extra despite + the constraints in `setup.py`, the calling code can use this helper + to verify the version compatibility at runtime. + Returns: + The ``pandas`` module or ``None``. + Raises: + exceptions.LegacyPandasError: + If the pandas package is outdated and ``raise_if_error`` is + ``True``. + """ + try: + import pandas + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise exceptions.LegacyPandasError( + "pandas package not found. Install pandas version >=" + f" {_MIN_PANDAS_VERSION}" + ) from exc + return None + + if self.installed_version < _MIN_PANDAS_VERSION: + if raise_if_error: + msg = ( + "Dependency pandas is outdated, please upgrade" + f" it to version >= {_MIN_PANDAS_VERSION}" + f" (version found: {self.installed_version})." + ) + raise exceptions.LegacyPandasError(msg) + return None + + return pandas + + +PANDAS_VERSIONS = PandasVersions() diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 496015b21..d4a759ba4 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -115,6 +115,9 @@ from google.cloud.bigquery.table import RowIterator pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() +pandas = ( + _versions_helpers.PANDAS_VERSIONS.try_import() +) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this TimeoutType = Union[float, None] ResumableTimeoutType = Union[ @@ -124,7 +127,6 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] - import pandas # type: ignore import requests # required by api-core _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB @@ -2488,7 +2490,7 @@ def load_table_from_file( def load_table_from_dataframe( self, - dataframe: "pandas.DataFrame", + dataframe: "pandas.DataFrame", # type: ignore destination: Union[Table, TableReference, str], num_retries: int = _DEFAULT_NUM_RETRIES, job_id: Optional[str] = None, diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py index e94a6c832..62e0d540c 100644 --- a/google/cloud/bigquery/exceptions.py +++ b/google/cloud/bigquery/exceptions.py @@ -29,3 +29,7 @@ class BigQueryStorageNotFoundError(BigQueryError): """Raised when BigQuery Storage extra is not installed when trying to import it. """ + + +class LegacyPandasError(BigQueryError): + """Raised when too old a version of pandas package is detected at runtime.""" diff --git a/tests/unit/test__versions_helpers.py b/tests/unit/test__versions_helpers.py index 144f14b7c..afe170e7a 100644 --- a/tests/unit/test__versions_helpers.py +++ b/tests/unit/test__versions_helpers.py @@ -26,6 +26,11 @@ except ImportError: # pragma: NO COVER bigquery_storage = None +try: + import pandas # type: ignore +except ImportError: # pragma: NO COVER + pandas = None + from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions @@ -173,3 +178,49 @@ def test_bqstorage_is_read_session_optional_false(): bqstorage_versions = _versions_helpers.BQStorageVersions() with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): assert not bqstorage_versions.is_read_session_optional + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +@pytest.mark.parametrize("version", ["1.5.0", "2.0.0", "2.1.0"]) +def test_try_import_raises_no_error_w_recent_pandas(version): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new=version): + try: + pandas = versions.try_import(raise_if_error=True) + assert pandas is not None + except exceptions.LegacyPandasError: # pragma: NO COVER + raise ("Legacy error raised with a non-legacy dependency version.") + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_try_import_returns_none_w_legacy_pandas(): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new="1.0.0"): + pandas = versions.try_import() + assert pandas is None + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_try_import_raises_error_w_legacy_pandas(): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new="1.0.0"): + with pytest.raises(exceptions.LegacyPandasError): + versions.try_import(raise_if_error=True) + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_installed_pandas_version_returns_cached(): + versions = _versions_helpers.PandasVersions() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_installed_pandas_version_returns_parsed_version(): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new="1.1.0"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 1 + assert version.micro == 0 From 2fb7260c014767924dec94639fdcb2f739e433b4 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 8 Nov 2023 13:00:24 -0800 Subject: [PATCH 186/536] chore: bump urllib3 from 1.26.12 to 1.26.18 (#1714) Source-Link: https://github.com/googleapis/synthtool/commit/febacccc98d6d224aff9d0bd0373bb5a4cd5969c Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:caffe0a9277daeccc4d1de5c9b55ebba0901b57c2f713ec9c876b0d4ec064f61 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 +- .kokoro/requirements.txt | 532 ++++++++++++++++++++------------------ 2 files changed, 277 insertions(+), 259 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 7f291dbd5..453b540c1 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:4f9b3b106ad0beafc2c8a415e3f62c1a0cc23cabea115dbe841b848f581cfe99 -# created: 2023-10-18T20:26:37.410353675Z + digest: sha256:caffe0a9277daeccc4d1de5c9b55ebba0901b57c2f713ec9c876b0d4ec064f61 +# created: 2023-11-08T19:46:45.022803742Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 16170d0ca..8957e2110 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -4,91 +4,75 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==2.0.0 \ - --hash=sha256:6372ad78c89d662035101418ae253668445b391755cfe94ea52f1b9d22425b20 \ - --hash=sha256:cffa11ea77999bb0dd27bb25ff6dc142a6796142f68d45b1a26b11f58724561e +argcomplete==3.1.4 \ + --hash=sha256:72558ba729e4c468572609817226fb0a6e7e9a0a7d477b882be168c0b4a62b94 \ + --hash=sha256:fbe56f8cda08aa9a04b307d8482ea703e96a6a801611acb4be9bf3942017989f # via nox -attrs==22.1.0 \ - --hash=sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6 \ - --hash=sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c +attrs==23.1.0 \ + --hash=sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04 \ + --hash=sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015 # via gcp-releasetool -bleach==5.0.1 \ - --hash=sha256:085f7f33c15bd408dd9b17a4ad77c577db66d76203e5984b1bd59baeee948b2a \ - --hash=sha256:0d03255c47eb9bd2f26aa9bb7f2107732e7e8fe195ca2f64709fcf3b0a4a085c - # via readme-renderer -cachetools==5.2.0 \ - --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ - --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db +cachetools==5.3.2 \ + --hash=sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2 \ + --hash=sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1 # via google-auth certifi==2023.7.22 \ --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 \ --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9 # via requests -cffi==1.15.1 \ - --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ - --hash=sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef \ - --hash=sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104 \ - --hash=sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426 \ - --hash=sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405 \ - --hash=sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375 \ - --hash=sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a \ - --hash=sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e \ - --hash=sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc \ - --hash=sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf \ - --hash=sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185 \ - --hash=sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497 \ - --hash=sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3 \ - --hash=sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35 \ - --hash=sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c \ - --hash=sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83 \ - --hash=sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21 \ - --hash=sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca \ - --hash=sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984 \ - --hash=sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac \ - --hash=sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd \ - --hash=sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee \ - --hash=sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a \ - --hash=sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2 \ - --hash=sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192 \ - --hash=sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7 \ - --hash=sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585 \ - --hash=sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f \ - --hash=sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e \ - --hash=sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27 \ - --hash=sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b \ - --hash=sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e \ - --hash=sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e \ - --hash=sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d \ - --hash=sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c \ - --hash=sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415 \ - --hash=sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82 \ - --hash=sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02 \ - --hash=sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314 \ - --hash=sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325 \ - --hash=sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c \ - --hash=sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3 \ - --hash=sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914 \ - --hash=sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045 \ - --hash=sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d \ - --hash=sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9 \ - --hash=sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5 \ - --hash=sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2 \ - --hash=sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c \ - --hash=sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3 \ - --hash=sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2 \ - --hash=sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8 \ - --hash=sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d \ - --hash=sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d \ - --hash=sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9 \ - --hash=sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162 \ - --hash=sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76 \ - --hash=sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4 \ - --hash=sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e \ - --hash=sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9 \ - --hash=sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6 \ - --hash=sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b \ - --hash=sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01 \ - --hash=sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0 +cffi==1.16.0 \ + --hash=sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc \ + --hash=sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a \ + --hash=sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417 \ + --hash=sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab \ + --hash=sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520 \ + --hash=sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36 \ + --hash=sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743 \ + --hash=sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8 \ + --hash=sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed \ + --hash=sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684 \ + --hash=sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56 \ + --hash=sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324 \ + --hash=sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d \ + --hash=sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235 \ + --hash=sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e \ + --hash=sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088 \ + --hash=sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000 \ + --hash=sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7 \ + --hash=sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e \ + --hash=sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673 \ + --hash=sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c \ + --hash=sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe \ + --hash=sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2 \ + --hash=sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098 \ + --hash=sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8 \ + --hash=sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a \ + --hash=sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0 \ + --hash=sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b \ + --hash=sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896 \ + --hash=sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e \ + --hash=sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9 \ + --hash=sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2 \ + --hash=sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b \ + --hash=sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6 \ + --hash=sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404 \ + --hash=sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f \ + --hash=sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0 \ + --hash=sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4 \ + --hash=sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc \ + --hash=sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936 \ + --hash=sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba \ + --hash=sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872 \ + --hash=sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb \ + --hash=sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614 \ + --hash=sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1 \ + --hash=sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d \ + --hash=sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969 \ + --hash=sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b \ + --hash=sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4 \ + --hash=sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627 \ + --hash=sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956 \ + --hash=sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357 # via cryptography charset-normalizer==2.1.1 \ --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ @@ -109,78 +93,74 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -commonmark==0.9.1 \ - --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ - --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 - # via rich -cryptography==41.0.4 \ - --hash=sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67 \ - --hash=sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311 \ - --hash=sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8 \ - --hash=sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13 \ - --hash=sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143 \ - --hash=sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f \ - --hash=sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829 \ - --hash=sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd \ - --hash=sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397 \ - --hash=sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac \ - --hash=sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d \ - --hash=sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a \ - --hash=sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839 \ - --hash=sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e \ - --hash=sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6 \ - --hash=sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9 \ - --hash=sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860 \ - --hash=sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca \ - --hash=sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91 \ - --hash=sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d \ - --hash=sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714 \ - --hash=sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb \ - --hash=sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f +cryptography==41.0.5 \ + --hash=sha256:0c327cac00f082013c7c9fb6c46b7cc9fa3c288ca702c74773968173bda421bf \ + --hash=sha256:0d2a6a598847c46e3e321a7aef8af1436f11c27f1254933746304ff014664d84 \ + --hash=sha256:227ec057cd32a41c6651701abc0328135e472ed450f47c2766f23267b792a88e \ + --hash=sha256:22892cc830d8b2c89ea60148227631bb96a7da0c1b722f2aac8824b1b7c0b6b8 \ + --hash=sha256:392cb88b597247177172e02da6b7a63deeff1937fa6fec3bbf902ebd75d97ec7 \ + --hash=sha256:3be3ca726e1572517d2bef99a818378bbcf7d7799d5372a46c79c29eb8d166c1 \ + --hash=sha256:573eb7128cbca75f9157dcde974781209463ce56b5804983e11a1c462f0f4e88 \ + --hash=sha256:580afc7b7216deeb87a098ef0674d6ee34ab55993140838b14c9b83312b37b86 \ + --hash=sha256:5a70187954ba7292c7876734183e810b728b4f3965fbe571421cb2434d279179 \ + --hash=sha256:73801ac9736741f220e20435f84ecec75ed70eda90f781a148f1bad546963d81 \ + --hash=sha256:7d208c21e47940369accfc9e85f0de7693d9a5d843c2509b3846b2db170dfd20 \ + --hash=sha256:8254962e6ba1f4d2090c44daf50a547cd5f0bf446dc658a8e5f8156cae0d8548 \ + --hash=sha256:88417bff20162f635f24f849ab182b092697922088b477a7abd6664ddd82291d \ + --hash=sha256:a48e74dad1fb349f3dc1d449ed88e0017d792997a7ad2ec9587ed17405667e6d \ + --hash=sha256:b948e09fe5fb18517d99994184854ebd50b57248736fd4c720ad540560174ec5 \ + --hash=sha256:c707f7afd813478e2019ae32a7c49cd932dd60ab2d2a93e796f68236b7e1fbf1 \ + --hash=sha256:d38e6031e113b7421db1de0c1b1f7739564a88f1684c6b89234fbf6c11b75147 \ + --hash=sha256:d3977f0e276f6f5bf245c403156673db103283266601405376f075c849a0b936 \ + --hash=sha256:da6a0ff8f1016ccc7477e6339e1d50ce5f59b88905585f77193ebd5068f1e797 \ + --hash=sha256:e270c04f4d9b5671ebcc792b3ba5d4488bf7c42c3c241a3748e2599776f29696 \ + --hash=sha256:e886098619d3815e0ad5790c973afeee2c0e6e04b4da90b88e6bd06e2a0b1b72 \ + --hash=sha256:ec3b055ff8f1dce8e6ef28f626e0972981475173d7973d63f271b29c8a2897da \ + --hash=sha256:fba1e91467c65fe64a82c689dc6cf58151158993b13eb7a7f3f4b7f395636723 # via # gcp-releasetool # secretstorage -distlib==0.3.6 \ - --hash=sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46 \ - --hash=sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e +distlib==0.3.7 \ + --hash=sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057 \ + --hash=sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8 # via virtualenv -docutils==0.19 \ - --hash=sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6 \ - --hash=sha256:5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc +docutils==0.20.1 \ + --hash=sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6 \ + --hash=sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b # via readme-renderer -filelock==3.8.0 \ - --hash=sha256:55447caa666f2198c5b6b13a26d2084d26fa5b115c00d065664b2124680c4edc \ - --hash=sha256:617eb4e5eedc82fc5f47b6d61e4d11cb837c56cb4544e39081099fa17ad109d4 +filelock==3.13.1 \ + --hash=sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e \ + --hash=sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c # via virtualenv -gcp-docuploader==0.6.4 \ - --hash=sha256:01486419e24633af78fd0167db74a2763974765ee8078ca6eb6964d0ebd388af \ - --hash=sha256:70861190c123d907b3b067da896265ead2eeb9263969d6955c9e0bb091b5ccbf +gcp-docuploader==0.6.5 \ + --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ + --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea # via -r requirements.in -gcp-releasetool==1.10.5 \ - --hash=sha256:174b7b102d704b254f2a26a3eda2c684fd3543320ec239baf771542a2e58e109 \ - --hash=sha256:e29d29927fe2ca493105a82958c6873bb2b90d503acac56be2c229e74de0eec9 +gcp-releasetool==1.16.0 \ + --hash=sha256:27bf19d2e87aaa884096ff941aa3c592c482be3d6a2bfe6f06afafa6af2353e3 \ + --hash=sha256:a316b197a543fd036209d0caba7a8eb4d236d8e65381c80cbc6d7efaa7606d63 # via -r requirements.in -google-api-core==2.10.2 \ - --hash=sha256:10c06f7739fe57781f87523375e8e1a3a4674bf6392cd6131a3222182b971320 \ - --hash=sha256:34f24bd1d5f72a8c4519773d99ca6bf080a6c4e041b4e9f024fe230191dda62e +google-api-core==2.12.0 \ + --hash=sha256:c22e01b1e3c4dcd90998494879612c38d0a3411d1f7b679eb89e2abe3ce1f553 \ + --hash=sha256:ec6054f7d64ad13b41e43d96f735acbd763b0f3b695dabaa2d579673f6a6e160 # via # google-cloud-core # google-cloud-storage -google-auth==2.14.1 \ - --hash=sha256:ccaa901f31ad5cbb562615eb8b664b3dd0bf5404a67618e642307f00613eda4d \ - --hash=sha256:f5d8701633bebc12e0deea4df8abd8aff31c28b355360597f7f2ee60f2e4d016 +google-auth==2.23.4 \ + --hash=sha256:79905d6b1652187def79d491d6e23d0cbb3a21d3c7ba0dbaa9c8a01906b13ff3 \ + --hash=sha256:d4bbc92fe4b8bfd2f3e8d88e5ba7085935da208ee38a134fc280e7ce682a05f2 # via # gcp-releasetool # google-api-core # google-cloud-core # google-cloud-storage -google-cloud-core==2.3.2 \ - --hash=sha256:8417acf6466be2fa85123441696c4badda48db314c607cf1e5d543fa8bdc22fe \ - --hash=sha256:b9529ee7047fd8d4bf4a2182de619154240df17fbe60ead399078c1ae152af9a +google-cloud-core==2.3.3 \ + --hash=sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb \ + --hash=sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863 # via google-cloud-storage -google-cloud-storage==2.6.0 \ - --hash=sha256:104ca28ae61243b637f2f01455cc8a05e8f15a2a18ced96cb587241cdd3820f5 \ - --hash=sha256:4ad0415ff61abdd8bb2ae81c1f8f7ec7d91a1011613f2db87c614c550f97bfe9 +google-cloud-storage==2.13.0 \ + --hash=sha256:ab0bf2e1780a1b74cf17fccb13788070b729f50c252f0c94ada2aae0ca95437d \ + --hash=sha256:f62dc4c7b6cd4360d072e3deb28035fbdad491ac3d9b0b1815a12daea10f37c7 # via gcp-docuploader google-crc32c==1.5.0 \ --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ @@ -251,29 +231,31 @@ google-crc32c==1.5.0 \ --hash=sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183 \ --hash=sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556 \ --hash=sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4 - # via google-resumable-media -google-resumable-media==2.4.0 \ - --hash=sha256:2aa004c16d295c8f6c33b2b4788ba59d366677c0a25ae7382436cb30f776deaa \ - --hash=sha256:8d5518502f92b9ecc84ac46779bd4f09694ecb3ba38a3e7ca737a86d15cbca1f + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.6.0 \ + --hash=sha256:972852f6c65f933e15a4a210c2b96930763b47197cdf4aa5f5bea435efb626e7 \ + --hash=sha256:fc03d344381970f79eebb632a3c18bb1828593a2dc5572b5f90115ef7d11e81b # via google-cloud-storage -googleapis-common-protos==1.57.0 \ - --hash=sha256:27a849d6205838fb6cc3c1c21cb9800707a661bb21c6ce7fb13e99eb1f8a0c46 \ - --hash=sha256:a9f4a1d7f6d9809657b7f1316a1aa527f6664891531bcfcc13b6696e685f443c +googleapis-common-protos==1.61.0 \ + --hash=sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0 \ + --hash=sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b # via google-api-core idna==3.4 \ --hash=sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4 \ --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 # via requests -importlib-metadata==5.0.0 \ - --hash=sha256:da31db32b304314d044d3c12c79bd59e307889b287ad12ff387b3500835fc2ab \ - --hash=sha256:ddb0e35065e8938f867ed4928d0ae5bf2a53b7773871bfe6bcc7e4fcdc7dea43 +importlib-metadata==6.8.0 \ + --hash=sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb \ + --hash=sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743 # via # -r requirements.in # keyring # twine -jaraco-classes==3.2.3 \ - --hash=sha256:2353de3288bc6b82120752201c6b1c1a14b058267fa424ed5ce5984e3b922158 \ - --hash=sha256:89559fa5c1d3c34eff6f631ad80bb21f378dbcbb35dd161fd2c6b93f5be2f98a +jaraco-classes==3.3.0 \ + --hash=sha256:10afa92b6743f25c0cf5f37c6bb6e18e2c5bb84a16527ccfc0040ea377e7aaeb \ + --hash=sha256:c063dd08e89217cee02c8d5e5ec560f2c8ce6cdc2fcdc2e68f7b2e5547ed3621 # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ @@ -285,75 +267,121 @@ jinja2==3.1.2 \ --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 # via gcp-releasetool -keyring==23.11.0 \ - --hash=sha256:3dd30011d555f1345dec2c262f0153f2f0ca6bca041fb1dc4588349bb4c0ac1e \ - --hash=sha256:ad192263e2cdd5f12875dedc2da13534359a7e760e77f8d04b50968a821c2361 +keyring==24.2.0 \ + --hash=sha256:4901caaf597bfd3bbd78c9a0c7c4c29fcd8310dab2cffefe749e916b6527acd6 \ + --hash=sha256:ca0746a19ec421219f4d713f848fa297a661a8a8c1504867e55bfb5e09091509 # via # gcp-releasetool # twine -markupsafe==2.1.1 \ - --hash=sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003 \ - --hash=sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88 \ - --hash=sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5 \ - --hash=sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7 \ - --hash=sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a \ - --hash=sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603 \ - --hash=sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1 \ - --hash=sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135 \ - --hash=sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247 \ - --hash=sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6 \ - --hash=sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601 \ - --hash=sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77 \ - --hash=sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02 \ - --hash=sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e \ - --hash=sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63 \ - --hash=sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f \ - --hash=sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980 \ - --hash=sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b \ - --hash=sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812 \ - --hash=sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff \ - --hash=sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96 \ - --hash=sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1 \ - --hash=sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925 \ - --hash=sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a \ - --hash=sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6 \ - --hash=sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e \ - --hash=sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f \ - --hash=sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4 \ - --hash=sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f \ - --hash=sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3 \ - --hash=sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c \ - --hash=sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a \ - --hash=sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417 \ - --hash=sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a \ - --hash=sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a \ - --hash=sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37 \ - --hash=sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452 \ - --hash=sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933 \ - --hash=sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a \ - --hash=sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7 +markdown-it-py==3.0.0 \ + --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ + --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb + # via rich +markupsafe==2.1.3 \ + --hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \ + --hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \ + --hash=sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431 \ + --hash=sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686 \ + --hash=sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c \ + --hash=sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559 \ + --hash=sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc \ + --hash=sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb \ + --hash=sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939 \ + --hash=sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c \ + --hash=sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0 \ + --hash=sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4 \ + --hash=sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9 \ + --hash=sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575 \ + --hash=sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba \ + --hash=sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d \ + --hash=sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd \ + --hash=sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3 \ + --hash=sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00 \ + --hash=sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155 \ + --hash=sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac \ + --hash=sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52 \ + --hash=sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f \ + --hash=sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8 \ + --hash=sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b \ + --hash=sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007 \ + --hash=sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24 \ + --hash=sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea \ + --hash=sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198 \ + --hash=sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0 \ + --hash=sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee \ + --hash=sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be \ + --hash=sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2 \ + --hash=sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1 \ + --hash=sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707 \ + --hash=sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6 \ + --hash=sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c \ + --hash=sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58 \ + --hash=sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823 \ + --hash=sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779 \ + --hash=sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636 \ + --hash=sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c \ + --hash=sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad \ + --hash=sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee \ + --hash=sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc \ + --hash=sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2 \ + --hash=sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48 \ + --hash=sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7 \ + --hash=sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e \ + --hash=sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b \ + --hash=sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa \ + --hash=sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5 \ + --hash=sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e \ + --hash=sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb \ + --hash=sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9 \ + --hash=sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57 \ + --hash=sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc \ + --hash=sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc \ + --hash=sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2 \ + --hash=sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11 # via jinja2 -more-itertools==9.0.0 \ - --hash=sha256:250e83d7e81d0c87ca6bd942e6aeab8cc9daa6096d12c5308f3f92fa5e5c1f41 \ - --hash=sha256:5a6257e40878ef0520b1803990e3e22303a41b5714006c32a3fd8304b26ea1ab +mdurl==0.1.2 \ + --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ + --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba + # via markdown-it-py +more-itertools==10.1.0 \ + --hash=sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a \ + --hash=sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6 # via jaraco-classes -nox==2022.11.21 \ - --hash=sha256:0e41a990e290e274cb205a976c4c97ee3c5234441a8132c8c3fd9ea3c22149eb \ - --hash=sha256:e21c31de0711d1274ca585a2c5fde36b1aa962005ba8e9322bf5eeed16dcd684 +nh3==0.2.14 \ + --hash=sha256:116c9515937f94f0057ef50ebcbcc10600860065953ba56f14473ff706371873 \ + --hash=sha256:18415df36db9b001f71a42a3a5395db79cf23d556996090d293764436e98e8ad \ + --hash=sha256:203cac86e313cf6486704d0ec620a992c8bc164c86d3a4fd3d761dd552d839b5 \ + --hash=sha256:2b0be5c792bd43d0abef8ca39dd8acb3c0611052ce466d0401d51ea0d9aa7525 \ + --hash=sha256:377aaf6a9e7c63962f367158d808c6a1344e2b4f83d071c43fbd631b75c4f0b2 \ + --hash=sha256:525846c56c2bcd376f5eaee76063ebf33cf1e620c1498b2a40107f60cfc6054e \ + --hash=sha256:5529a3bf99402c34056576d80ae5547123f1078da76aa99e8ed79e44fa67282d \ + --hash=sha256:7771d43222b639a4cd9e341f870cee336b9d886de1ad9bec8dddab22fe1de450 \ + --hash=sha256:88c753efbcdfc2644a5012938c6b9753f1c64a5723a67f0301ca43e7b85dcf0e \ + --hash=sha256:93a943cfd3e33bd03f77b97baa11990148687877b74193bf777956b67054dcc6 \ + --hash=sha256:9be2f68fb9a40d8440cbf34cbf40758aa7f6093160bfc7fb018cce8e424f0c3a \ + --hash=sha256:a0c509894fd4dccdff557068e5074999ae3b75f4c5a2d6fb5415e782e25679c4 \ + --hash=sha256:ac8056e937f264995a82bf0053ca898a1cb1c9efc7cd68fa07fe0060734df7e4 \ + --hash=sha256:aed56a86daa43966dd790ba86d4b810b219f75b4bb737461b6886ce2bde38fd6 \ + --hash=sha256:e8986f1dd3221d1e741fda0a12eaa4a273f1d80a35e31a1ffe579e7c621d069e \ + --hash=sha256:f99212a81c62b5f22f9e7c3e347aa00491114a5647e1f13bbebd79c3e5f08d75 + # via readme-renderer +nox==2023.4.22 \ + --hash=sha256:0b1adc619c58ab4fa57d6ab2e7823fe47a32e70202f287d78474adcc7bda1891 \ + --hash=sha256:46c0560b0dc609d7d967dc99e22cb463d3c4caf54a5fda735d6c11b5177e3a9f # via -r requirements.in -packaging==21.3 \ - --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ - --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 +packaging==23.2 \ + --hash=sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5 \ + --hash=sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7 # via # gcp-releasetool # nox -pkginfo==1.8.3 \ - --hash=sha256:848865108ec99d4901b2f7e84058b6e7660aae8ae10164e015a6dcf5b242a594 \ - --hash=sha256:a84da4318dd86f870a9447a8c98340aa06216bfc6f2b7bdc4b8766984ae1867c +pkginfo==1.9.6 \ + --hash=sha256:4b7a555a6d5a22169fcc9cf7bfd78d296b0361adad412a346c1226849af5e546 \ + --hash=sha256:8fd5896e8718a4372f0ea9cc9d96f6417c9b986e23a4d116dda26b62cc29d046 # via twine -platformdirs==2.5.4 \ - --hash=sha256:1006647646d80f16130f052404c6b901e80ee4ed6bef6792e1f238a8969106f7 \ - --hash=sha256:af0276409f9a02373d540bf8480021a048711d572745aef4b7842dad245eba10 +platformdirs==3.11.0 \ + --hash=sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3 \ + --hash=sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e # via virtualenv protobuf==3.20.3 \ --hash=sha256:03038ac1cfbc41aa21f6afcbcd357281d7521b4157926f30ebecc8d4ea59dcb7 \ @@ -383,34 +411,30 @@ protobuf==3.20.3 \ # gcp-releasetool # google-api-core # googleapis-common-protos -pyasn1==0.4.8 \ - --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \ - --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba +pyasn1==0.5.0 \ + --hash=sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57 \ + --hash=sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde # via # pyasn1-modules # rsa -pyasn1-modules==0.2.8 \ - --hash=sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e \ - --hash=sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74 +pyasn1-modules==0.3.0 \ + --hash=sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c \ + --hash=sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d # via google-auth pycparser==2.21 \ --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 # via cffi -pygments==2.15.0 \ - --hash=sha256:77a3299119af881904cd5ecd1ac6a66214b6e9bed1f2db16993b54adede64094 \ - --hash=sha256:f7e36cffc4c517fbc252861b9a6e4644ca0e5abadf9a113c72d1358ad09b9500 +pygments==2.16.1 \ + --hash=sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692 \ + --hash=sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29 # via # readme-renderer # rich -pyjwt==2.6.0 \ - --hash=sha256:69285c7e31fc44f68a1feb309e948e0df53259d579295e6cfe2b1792329f05fd \ - --hash=sha256:d83c3d892a77bbb74d3e1a2cfa90afaadb60945205d1095d9221f04466f64c14 +pyjwt==2.8.0 \ + --hash=sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de \ + --hash=sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320 # via gcp-releasetool -pyparsing==3.0.9 \ - --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ - --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc - # via packaging pyperclip==1.8.2 \ --hash=sha256:105254a8b04934f0bc84e9c24eb360a591aaf6535c9def5f29d92af107a9bf57 # via gcp-releasetool @@ -418,9 +442,9 @@ python-dateutil==2.8.2 \ --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 # via gcp-releasetool -readme-renderer==37.3 \ - --hash=sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273 \ - --hash=sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343 +readme-renderer==42.0 \ + --hash=sha256:13d039515c1f24de668e2c93f2e877b9dbe6c6c32328b90a40a49d8b2b85f36d \ + --hash=sha256:2d55489f83be4992fe4454939d1a051c33edbab778e82761d060c9fc6b308cd1 # via twine requests==2.31.0 \ --hash=sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f \ @@ -431,17 +455,17 @@ requests==2.31.0 \ # google-cloud-storage # requests-toolbelt # twine -requests-toolbelt==0.10.1 \ - --hash=sha256:18565aa58116d9951ac39baa288d3adb5b3ff975c4f25eee78555d89e8f247f7 \ - --hash=sha256:62e09f7ff5ccbda92772a29f394a49c3ad6cb181d568b1337626b2abb628a63d +requests-toolbelt==1.0.0 \ + --hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \ + --hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06 # via twine rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c # via twine -rich==12.6.0 \ - --hash=sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e \ - --hash=sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0 +rich==13.6.0 \ + --hash=sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245 \ + --hash=sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef # via twine rsa==4.9 \ --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ @@ -455,43 +479,37 @@ six==1.16.0 \ --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 # via - # bleach # gcp-docuploader - # google-auth # python-dateutil -twine==4.0.1 \ - --hash=sha256:42026c18e394eac3e06693ee52010baa5313e4811d5a11050e7d48436cf41b9e \ - --hash=sha256:96b1cf12f7ae611a4a40b6ae8e9570215daff0611828f5fe1f37a16255ab24a0 +twine==4.0.2 \ + --hash=sha256:929bc3c280033347a00f847236564d1c52a3e61b1ac2516c97c48f3ceab756d8 \ + --hash=sha256:9e102ef5fdd5a20661eb88fad46338806c3bd32cf1db729603fe3697b1bc83c8 # via -r requirements.in -typing-extensions==4.4.0 \ - --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ - --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e +typing-extensions==4.8.0 \ + --hash=sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0 \ + --hash=sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef # via -r requirements.in -urllib3==1.26.18 \ - --hash=sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07 \ - --hash=sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0 +urllib3==2.0.7 \ + --hash=sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84 \ + --hash=sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e # via # requests # twine -virtualenv==20.16.7 \ - --hash=sha256:8691e3ff9387f743e00f6bb20f70121f5e4f596cae754531f2b3b3a1b1ac696e \ - --hash=sha256:efd66b00386fdb7dbe4822d172303f40cd05e50e01740b19ea42425cbe653e29 +virtualenv==20.24.6 \ + --hash=sha256:02ece4f56fbf939dbbc33c0715159951d6bf14aaf5457b092e4548e1382455af \ + --hash=sha256:520d056652454c5098a00c0f073611ccbea4c79089331f60bf9d7ba247bb7381 # via nox -webencodings==0.5.1 \ - --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \ - --hash=sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923 - # via bleach -wheel==0.38.4 \ - --hash=sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac \ - --hash=sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8 +wheel==0.41.3 \ + --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \ + --hash=sha256:4d4987ce51a49370ea65c0bfd2234e8ce80a12780820d9dc462597a6e60d0841 # via -r requirements.in -zipp==3.10.0 \ - --hash=sha256:4fcb6f278987a6605757302a6e40e896257570d11c51628968ccb2a47e80c6c1 \ - --hash=sha256:7a7262fd930bd3e36c50b9a64897aec3fafff3dfdeec9623ae22b40e93f99bb8 +zipp==3.17.0 \ + --hash=sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31 \ + --hash=sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==65.5.1 \ - --hash=sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31 \ - --hash=sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f +setuptools==68.2.2 \ + --hash=sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87 \ + --hash=sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a # via -r requirements.in From 58b3152ffc6ca473c35d2a6f3af80f60b56f027e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 10 Nov 2023 02:18:22 +0100 Subject: [PATCH 187/536] chore(deps): update dependency pyarrow to v14 [security] (#1718) * chore(deps): update dependency pyarrow to v14 [security] * pin pyarrow to 12.0.1 for python 3.7 * pin pyarrow to 12.0.1 for python 3.7 * pin pyarrow to 12.0.1 for python 3.7 --------- Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 3 ++- samples/magics/requirements.txt | 3 ++- samples/snippets/requirements.txt | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 9bc6ee32c..867f44b35 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -28,7 +28,8 @@ packaging==23.1 pandas===1.3.5; python_version == '3.7' pandas==2.0.3; python_version >= '3.8' proto-plus==1.22.3 -pyarrow==12.0.1 +pyarrow==12.0.1; python_version == '3.7' +pyarrow==14.0.1; python_version >= '3.8' pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index c3300ae20..8feb236fc 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -10,6 +10,7 @@ matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1 +pyarrow==12.0.1; python_version == '3.7' +pyarrow==14.0.1; python_version >= '3.8' pytz==2023.3 typing-extensions==4.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index da99249d2..1eeffe32c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -11,6 +11,7 @@ matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1 +pyarrow==12.0.1; python_version == '3.7' +pyarrow==14.0.1; python_version >= '3.8' pytz==2023.3 typing-extensions==4.7.1 From 84d64cdd157afef4a7bf7807e557d59452133434 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 16 Nov 2023 13:29:19 -0500 Subject: [PATCH 188/536] feat: add `job_timeout_ms` to job configuration classes (#1675) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: adds new property and tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates docs to correct a sphinx failure * Updates formatting * Update tests/system/test_query.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update google/cloud/bigquery/job/base.py * updates one test and uses int_or_none * Update tests/system/test_query.py testing something. * Update tests/system/test_query.py * testing coverage feature * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * minor edits * tweaks to noxfile for testing purposes * add new test to base as experiment * adds a test, updates import statements * add another test * edit to tests * formatting fixes * update noxfile to correct debug code * removes unneeded comments. --------- Co-authored-by: Owl Bot --- google/cloud/bigquery/job/base.py | 32 +++++++++++++++++++++++++++++++ noxfile.py | 7 ++++++- tests/unit/job/test_base.py | 15 +++++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index a6267be41..78df9142f 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -26,6 +26,7 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery._helpers import _int_or_none if typing.TYPE_CHECKING: # pragma: NO COVER from google.api_core import retry as retries @@ -171,6 +172,37 @@ def __setattr__(self, name, value): ) super(_JobConfig, self).__setattr__(name, value) + @property + def job_timeout_ms(self): + """Optional parameter. Job timeout in milliseconds. If this time limit is exceeded, BigQuery might attempt to stop the job. + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.job_timeout_ms + e.g. + + job_config = bigquery.QueryJobConfig( job_timeout_ms = 5000 ) + or + job_config.job_timeout_ms = 5000 + + Raises: + ValueError: If ``value`` type is invalid. + """ + + # None as this is an optional parameter. + if self._properties.get("jobTimeoutMs"): + return self._properties["jobTimeoutMs"] + return None + + @job_timeout_ms.setter + def job_timeout_ms(self, value): + try: + value = _int_or_none(value) + except ValueError as err: + raise ValueError("Pass an int for jobTimeoutMs, e.g. 5000").with_traceback( + err.__traceback__ + ) + + """ Docs indicate a string is expected by the API """ + self._properties["jobTimeoutMs"] = str(value) + @property def labels(self): """Dict[str, str]: Labels for the job. diff --git a/noxfile.py b/noxfile.py index 703e36cbb..7cf5f6021 100644 --- a/noxfile.py +++ b/noxfile.py @@ -193,7 +193,12 @@ def system(session): session.install("-e", f".{extras}", "-c", constraints_path) # Run py.test against the system tests. - session.run("py.test", "--quiet", os.path.join("tests", "system"), *session.posargs) + session.run( + "py.test", + "--quiet", + os.path.join("tests", "system"), + *session.posargs, + ) @nox.session(python=DEFAULT_PYTHON_VERSION) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index a662e92d4..5635d0e32 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -1228,3 +1228,18 @@ def test_labels_setter(self): job_config = self._make_one() job_config.labels = labels self.assertEqual(job_config._properties["labels"], labels) + + def test_job_timeout_ms_raises_valueerror(self): + # Confirm that attempting to set a non-integer values will raise an Error. + with pytest.raises(ValueError): + job_config = self._make_one() + job_config.job_timeout_ms = "WillRaiseError" + + def test_job_timeout_ms(self): + # Confirm that default status is None. + job_config = self._make_one() + assert job_config.job_timeout_ms is None + + # Confirm that integers get converted to strings. + job_config.job_timeout_ms = 5000 + assert job_config.job_timeout_ms == "5000" # int is converted to string From f804d639fe95bef5d083afe1246d756321128b05 Mon Sep 17 00:00:00 2001 From: Kira Date: Thu, 16 Nov 2023 16:16:49 -0800 Subject: [PATCH 189/536] chore: standardize samples directory (#1727) * Removed all dependencies from samples/snippets thats not google-cloud-bigquery * chore: standardizing extra-dependency samples * readded original dependencies to sample/snippets requirements --- noxfile.py | 2 + samples/desktopapp/__init__.py | 13 + samples/desktopapp/mypy.ini | 8 + samples/desktopapp/noxfile.py | 293 ++++++++++++++++++++ samples/desktopapp/noxfile_config.py | 40 +++ samples/desktopapp/requirements-test.txt | 3 + samples/desktopapp/requirements.txt | 17 ++ samples/desktopapp/user_credentials.py | 78 ++++++ samples/desktopapp/user_credentials_test.py | 45 +++ samples/notebooks/__init__.py | 13 + samples/notebooks/jupyter_tutorial_test.py | 175 ++++++++++++ samples/notebooks/mypy.ini | 8 + samples/notebooks/noxfile.py | 293 ++++++++++++++++++++ samples/notebooks/noxfile_config.py | 40 +++ samples/notebooks/requirements-test.txt | 3 + samples/notebooks/requirements.txt | 17 ++ samples/snippets/requirements.txt | 2 +- 17 files changed, 1049 insertions(+), 1 deletion(-) create mode 100644 samples/desktopapp/__init__.py create mode 100644 samples/desktopapp/mypy.ini create mode 100644 samples/desktopapp/noxfile.py create mode 100644 samples/desktopapp/noxfile_config.py create mode 100644 samples/desktopapp/requirements-test.txt create mode 100644 samples/desktopapp/requirements.txt create mode 100644 samples/desktopapp/user_credentials.py create mode 100644 samples/desktopapp/user_credentials_test.py create mode 100644 samples/notebooks/__init__.py create mode 100644 samples/notebooks/jupyter_tutorial_test.py create mode 100644 samples/notebooks/mypy.ini create mode 100644 samples/notebooks/noxfile.py create mode 100644 samples/notebooks/noxfile_config.py create mode 100644 samples/notebooks/requirements-test.txt create mode 100644 samples/notebooks/requirements.txt diff --git a/noxfile.py b/noxfile.py index 7cf5f6021..9ccbdd30c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -263,8 +263,10 @@ def snippets(session): session.run( "py.test", "samples", + "--ignore=samples/desktopapp", "--ignore=samples/magics", "--ignore=samples/geography", + "--ignore=samples/notebooks", "--ignore=samples/snippets", *session.posargs, ) diff --git a/samples/desktopapp/__init__.py b/samples/desktopapp/__init__.py new file mode 100644 index 000000000..4fbd93bb2 --- /dev/null +++ b/samples/desktopapp/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/samples/desktopapp/mypy.ini b/samples/desktopapp/mypy.ini new file mode 100644 index 000000000..d27b6b599 --- /dev/null +++ b/samples/desktopapp/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +; We require type annotations in all samples. +strict = True +exclude = noxfile\.py +warn_unused_configs = True + +[mypy-google.auth,google.oauth2,geojson,google_auth_oauthlib,IPython.*] +ignore_missing_imports = True diff --git a/samples/desktopapp/noxfile.py b/samples/desktopapp/noxfile.py new file mode 100644 index 000000000..1224cbe21 --- /dev/null +++ b/samples/desktopapp/noxfile.py @@ -0,0 +1,293 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import glob +import os +from pathlib import Path +import sys +from typing import Callable, Dict, Optional + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +BLACK_VERSION = "black==22.3.0" +ISORT_VERSION = "isort==5.10.1" + +# Copy `noxfile_config.py` to your directory and modify it instead. + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append(".") + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars() -> Dict[str, str]: + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG["gcloud_project_env"] + # This should error out if not set. + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG["envs"]) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to test samples. +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) + +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True + +# +# Style Checks +# + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session: nox.sessions.Session) -> None: + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8") + else: + session.install("flake8", "flake8-annotations") + + args = FLAKE8_COMMON_ARGS + [ + ".", + ] + session.run("flake8", *args) + + +# +# Black +# + + +@nox.session +def blacken(session: nox.sessions.Session) -> None: + """Run black. Format code to uniform standard.""" + session.install(BLACK_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + +# +# format = isort + black +# + + +@nox.session +def format(session: nox.sessions.Session) -> None: + """ + Run isort to sort imports. Then run black + to format code to uniform standard. + """ + session.install(BLACK_VERSION, ISORT_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + # Use the --fss option to sort imports using strict alphabetical order. + # See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections + session.run("isort", "--fss", *python_files) + session.run("black", *python_files) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: + # check for presence of tests + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob( + "**/test_*.py", recursive=True + ) + test_list.extend(glob.glob("**/tests", recursive=True)) + + if len(test_list) == 0: + print("No tests found, skipping directory.") + return + + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + concurrent_args = [] + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + with open("requirements.txt") as rfile: + packages = rfile.read() + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") + with open("requirements-test.txt") as rtfile: + packages += rtfile.read() + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + if "pytest-parallel" in packages: + concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"]) + elif "pytest-xdist" in packages: + concurrent_args.extend(["-n", "auto"]) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session: nox.sessions.Session) -> None: + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) + + +# +# Readmegen +# + + +def _get_repo_root() -> Optional[str]: + """Returns the root folder of the project.""" + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session: nox.sessions.Session, path: str) -> None: + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/samples/desktopapp/noxfile_config.py b/samples/desktopapp/noxfile_config.py new file mode 100644 index 000000000..315bd5be8 --- /dev/null +++ b/samples/desktopapp/noxfile_config.py @@ -0,0 +1,40 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [ + "2.7", + # TODO: Enable 3.10 once there is a geopandas/fiona release. + # https://github.com/Toblerity/Fiona/issues/1043 + "3.10", + ], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt new file mode 100644 index 000000000..514f09705 --- /dev/null +++ b/samples/desktopapp/requirements-test.txt @@ -0,0 +1,3 @@ +google-cloud-testutils==1.3.3 +pytest==7.4.0 +mock==5.1.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt new file mode 100644 index 000000000..1eeffe32c --- /dev/null +++ b/samples/desktopapp/requirements.txt @@ -0,0 +1,17 @@ +db-dtypes==1.1.1 +google-cloud-bigquery==3.11.4 +google-cloud-bigquery-storage==2.22.0 +google-auth-oauthlib==1.0.0 +grpcio==1.57.0 +ipywidgets==8.1.0 +ipython===7.31.1; python_version == '3.7' +ipython===8.0.1; python_version == '3.8' +ipython==8.14.0; python_version >= '3.9' +matplotlib===3.5.3; python_version == '3.7' +matplotlib==3.7.2; python_version >= '3.8' +pandas===1.3.5; python_version == '3.7' +pandas==2.0.3; python_version >= '3.8' +pyarrow==12.0.1; python_version == '3.7' +pyarrow==14.0.1; python_version >= '3.8' +pytz==2023.3 +typing-extensions==4.7.1 diff --git a/samples/desktopapp/user_credentials.py b/samples/desktopapp/user_credentials.py new file mode 100644 index 000000000..487a56c5f --- /dev/null +++ b/samples/desktopapp/user_credentials.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Command-line application to run a query using user credentials. + +You must supply a client secrets file, which would normally be bundled with +your application. +""" + +import argparse + + +def main(project: str) -> None: + # [START bigquery_auth_user_flow] + from google_auth_oauthlib import flow + + # A local server is used as the callback URL in the auth flow. + appflow = flow.InstalledAppFlow.from_client_secrets_file( + "client_secrets.json", scopes=["https://www.googleapis.com/auth/bigquery"] + ) + + # This launches a local server to be used as the callback URL in the desktop + # app auth flow. If you are accessing the application remotely, such as over + # SSH or a remote Jupyter notebook, this flow will not work. Use the + # `gcloud auth application-default login --no-browser` command or workload + # identity federation to get authentication tokens, instead. + # + appflow.run_local_server() + + credentials = appflow.credentials + # [END bigquery_auth_user_flow] + + # [START bigquery_auth_user_query] + from google.cloud import bigquery + + # TODO: Uncomment the line below to set the `project` variable. + # project = 'user-project-id' + # + # The `project` variable defines the project to be billed for query + # processing. The user must have the bigquery.jobs.create permission on + # this project to run a query. See: + # https://cloud.google.com/bigquery/docs/access-control#permissions + + client = bigquery.Client(project=project, credentials=credentials) + + query_string = """SELECT name, SUM(number) as total + FROM `bigquery-public-data.usa_names.usa_1910_current` + WHERE name = 'William' + GROUP BY name; + """ + query_job = client.query(query_string) + + # Print the results. + for row in query_job.result(): # Wait for the job to complete. + print("{}: {}".format(row["name"], row["total"])) + # [END bigquery_auth_user_query] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("project", help="Project to use for BigQuery billing.") + args = parser.parse_args() + main(args.project) diff --git a/samples/desktopapp/user_credentials_test.py b/samples/desktopapp/user_credentials_test.py new file mode 100644 index 000000000..baa9e33f1 --- /dev/null +++ b/samples/desktopapp/user_credentials_test.py @@ -0,0 +1,45 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Iterator, Union + +import google.auth +import mock +import pytest + +from .user_credentials import main # type: ignore + +PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] + +MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock] + + +@pytest.fixture +def mock_flow() -> Iterator[MockType]: + flow_patch = mock.patch("google_auth_oauthlib.flow.InstalledAppFlow", autospec=True) + + with flow_patch as flow_mock: + flow_mock.from_client_secrets_file.return_value = flow_mock + flow_mock.credentials = google.auth.default()[0] + yield flow_mock + + +def test_auth_query_console( + mock_flow: MockType, capsys: pytest.CaptureFixture[str] +) -> None: + main(PROJECT) + out, _ = capsys.readouterr() + # Fun fact: William P. Wood was the 1st director of the US Secret Service. + assert "William" in out diff --git a/samples/notebooks/__init__.py b/samples/notebooks/__init__.py new file mode 100644 index 000000000..4fbd93bb2 --- /dev/null +++ b/samples/notebooks/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/samples/notebooks/jupyter_tutorial_test.py b/samples/notebooks/jupyter_tutorial_test.py new file mode 100644 index 000000000..9d42a4eda --- /dev/null +++ b/samples/notebooks/jupyter_tutorial_test.py @@ -0,0 +1,175 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing +from typing import Iterator + +import pytest + +if typing.TYPE_CHECKING: + from IPython.terminal.interactiveshell import TerminalInteractiveShell + +IPython = pytest.importorskip("IPython") +interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") +tools = pytest.importorskip("IPython.testing.tools") +matplotlib = pytest.importorskip("matplotlib") + +# Ignore semicolon lint warning because semicolons are used in notebooks +# flake8: noqa E703 + + +@pytest.fixture(scope="session") +def ipython() -> "TerminalInteractiveShell": + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture() +def ipython_interactive( + request: pytest.FixtureRequest, ipython: "TerminalInteractiveShell" +) -> Iterator["TerminalInteractiveShell"]: + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython + + +def _strip_region_tags(sample_text: str) -> str: + """Remove blank lines and region tags from sample text""" + magic_lines = [ + line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line + ] + return "\n".join(magic_lines) + + +def test_jupyter_tutorial(ipython: "TerminalInteractiveShell") -> None: + matplotlib.use("agg") + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + + sample = """ + # [START bigquery_jupyter_magic_gender_by_year] + %%bigquery + SELECT + source_year AS year, + COUNT(is_male) AS birth_count + FROM `bigquery-public-data.samples.natality` + GROUP BY year + ORDER BY year DESC + LIMIT 15 + # [END bigquery_jupyter_magic_gender_by_year] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + sample = """ + # [START bigquery_jupyter_magic_gender_by_year_var] + %%bigquery total_births + SELECT + source_year AS year, + COUNT(is_male) AS birth_count + FROM `bigquery-public-data.samples.natality` + GROUP BY year + ORDER BY year DESC + LIMIT 15 + # [END bigquery_jupyter_magic_gender_by_year_var] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + assert "total_births" in ip.user_ns # verify that variable exists + total_births = ip.user_ns["total_births"] + # [START bigquery_jupyter_plot_births_by_year] + total_births.plot(kind="bar", x="year", y="birth_count") + # [END bigquery_jupyter_plot_births_by_year] + + sample = """ + # [START bigquery_jupyter_magic_gender_by_weekday] + %%bigquery births_by_weekday + SELECT + wday, + SUM(CASE WHEN is_male THEN 1 ELSE 0 END) AS male_births, + SUM(CASE WHEN is_male THEN 0 ELSE 1 END) AS female_births + FROM `bigquery-public-data.samples.natality` + WHERE wday IS NOT NULL + GROUP BY wday + ORDER BY wday ASC + # [END bigquery_jupyter_magic_gender_by_weekday] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + assert "births_by_weekday" in ip.user_ns # verify that variable exists + births_by_weekday = ip.user_ns["births_by_weekday"] + # [START bigquery_jupyter_plot_births_by_weekday] + births_by_weekday.plot(x="wday") + # [END bigquery_jupyter_plot_births_by_weekday] + + # [START bigquery_jupyter_import_and_client] + from google.cloud import bigquery + + client = bigquery.Client() + # [END bigquery_jupyter_import_and_client] + + # [START bigquery_jupyter_query_plurality_by_year] + sql = """ + SELECT + plurality, + COUNT(1) AS count, + year + FROM + `bigquery-public-data.samples.natality` + WHERE + NOT IS_NAN(plurality) AND plurality > 1 + GROUP BY + plurality, year + ORDER BY + count DESC + """ + df = client.query(sql).to_dataframe() + df.head() + # [END bigquery_jupyter_query_plurality_by_year] + + # [START bigquery_jupyter_plot_plurality_by_year] + pivot_table = df.pivot(index="year", columns="plurality", values="count") + pivot_table.plot(kind="bar", stacked=True, figsize=(15, 7)) + # [END bigquery_jupyter_plot_plurality_by_year] + + # [START bigquery_jupyter_query_births_by_gestation] + sql = """ + SELECT + gestation_weeks, + COUNT(1) AS count + FROM + `bigquery-public-data.samples.natality` + WHERE + NOT IS_NAN(gestation_weeks) AND gestation_weeks <> 99 + GROUP BY + gestation_weeks + ORDER BY + gestation_weeks + """ + df = client.query(sql).to_dataframe() + # [END bigquery_jupyter_query_births_by_gestation] + + # [START bigquery_jupyter_plot_births_by_gestation] + ax = df.plot(kind="bar", x="gestation_weeks", y="count", figsize=(15, 7)) + ax.set_title("Count of Births by Gestation Weeks") + ax.set_xlabel("Gestation Weeks") + ax.set_ylabel("Count") + # [END bigquery_jupyter_plot_births_by_gestation] diff --git a/samples/notebooks/mypy.ini b/samples/notebooks/mypy.ini new file mode 100644 index 000000000..dea60237b --- /dev/null +++ b/samples/notebooks/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +; We require type annotations in all samples. +strict = True +exclude = noxfile\.py +warn_unused_configs = True + +[mypy-IPython.*,nox,noxfile_config,pandas] +ignore_missing_imports = True \ No newline at end of file diff --git a/samples/notebooks/noxfile.py b/samples/notebooks/noxfile.py new file mode 100644 index 000000000..1224cbe21 --- /dev/null +++ b/samples/notebooks/noxfile.py @@ -0,0 +1,293 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import glob +import os +from pathlib import Path +import sys +from typing import Callable, Dict, Optional + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +BLACK_VERSION = "black==22.3.0" +ISORT_VERSION = "isort==5.10.1" + +# Copy `noxfile_config.py` to your directory and modify it instead. + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append(".") + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars() -> Dict[str, str]: + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG["gcloud_project_env"] + # This should error out if not set. + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG["envs"]) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to test samples. +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) + +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True + +# +# Style Checks +# + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session: nox.sessions.Session) -> None: + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8") + else: + session.install("flake8", "flake8-annotations") + + args = FLAKE8_COMMON_ARGS + [ + ".", + ] + session.run("flake8", *args) + + +# +# Black +# + + +@nox.session +def blacken(session: nox.sessions.Session) -> None: + """Run black. Format code to uniform standard.""" + session.install(BLACK_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + +# +# format = isort + black +# + + +@nox.session +def format(session: nox.sessions.Session) -> None: + """ + Run isort to sort imports. Then run black + to format code to uniform standard. + """ + session.install(BLACK_VERSION, ISORT_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + # Use the --fss option to sort imports using strict alphabetical order. + # See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections + session.run("isort", "--fss", *python_files) + session.run("black", *python_files) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: + # check for presence of tests + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob( + "**/test_*.py", recursive=True + ) + test_list.extend(glob.glob("**/tests", recursive=True)) + + if len(test_list) == 0: + print("No tests found, skipping directory.") + return + + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + concurrent_args = [] + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + with open("requirements.txt") as rfile: + packages = rfile.read() + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") + with open("requirements-test.txt") as rtfile: + packages += rtfile.read() + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + if "pytest-parallel" in packages: + concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"]) + elif "pytest-xdist" in packages: + concurrent_args.extend(["-n", "auto"]) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session: nox.sessions.Session) -> None: + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) + + +# +# Readmegen +# + + +def _get_repo_root() -> Optional[str]: + """Returns the root folder of the project.""" + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session: nox.sessions.Session, path: str) -> None: + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/samples/notebooks/noxfile_config.py b/samples/notebooks/noxfile_config.py new file mode 100644 index 000000000..315bd5be8 --- /dev/null +++ b/samples/notebooks/noxfile_config.py @@ -0,0 +1,40 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [ + "2.7", + # TODO: Enable 3.10 once there is a geopandas/fiona release. + # https://github.com/Toblerity/Fiona/issues/1043 + "3.10", + ], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt new file mode 100644 index 000000000..514f09705 --- /dev/null +++ b/samples/notebooks/requirements-test.txt @@ -0,0 +1,3 @@ +google-cloud-testutils==1.3.3 +pytest==7.4.0 +mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt new file mode 100644 index 000000000..1eeffe32c --- /dev/null +++ b/samples/notebooks/requirements.txt @@ -0,0 +1,17 @@ +db-dtypes==1.1.1 +google-cloud-bigquery==3.11.4 +google-cloud-bigquery-storage==2.22.0 +google-auth-oauthlib==1.0.0 +grpcio==1.57.0 +ipywidgets==8.1.0 +ipython===7.31.1; python_version == '3.7' +ipython===8.0.1; python_version == '3.8' +ipython==8.14.0; python_version >= '3.9' +matplotlib===3.5.3; python_version == '3.7' +matplotlib==3.7.2; python_version >= '3.8' +pandas===1.3.5; python_version == '3.7' +pandas==2.0.3; python_version >= '3.8' +pyarrow==12.0.1; python_version == '3.7' +pyarrow==14.0.1; python_version >= '3.8' +pytz==2023.3 +typing-extensions==4.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1eeffe32c..72a77ad11 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -14,4 +14,4 @@ pandas==2.0.3; python_version >= '3.8' pyarrow==12.0.1; python_version == '3.7' pyarrow==14.0.1; python_version >= '3.8' pytz==2023.3 -typing-extensions==4.7.1 +typing-extensions==4.7.1 \ No newline at end of file From 494f275ab2493dc7904f685c4d12e60bef51ab21 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 17 Nov 2023 18:43:59 -0600 Subject: [PATCH 190/536] feat: add `job_id`, `location`, `project`, and `query_id` properties on `RowIterator` (#1733) * feat: add `job_id`, `location`, `project`, and `query_id` properties on `RowIterator` These can be used to recover the original job metadata when `RowIterator` is the result of a `QueryJob`. * rename bqstorage_project to billing project * Update google/cloud/bigquery/table.py Co-authored-by: Lingqing Gan --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/client.py | 10 ++++++ google/cloud/bigquery/job/query.py | 24 ++++++++++++-- google/cloud/bigquery/query.py | 8 +++++ google/cloud/bigquery/table.py | 49 +++++++++++++++++++++++++++-- tests/unit/job/test_query.py | 18 ++++++++++- tests/unit/job/test_query_pandas.py | 12 ++++--- tests/unit/test_client.py | 11 ++++++- tests/unit/test_query.py | 10 ++++++ tests/unit/test_table.py | 32 +++++++++++++++++++ 9 files changed, 163 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index d4a759ba4..4e72ac922 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3843,6 +3843,8 @@ def list_rows( # tables can be fetched without a column filter. selected_fields=selected_fields, total_rows=getattr(table, "num_rows", None), + project=table.project, + location=table.location, ) return row_iterator @@ -3859,6 +3861,7 @@ def _list_rows_from_query_results( page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + query_id: Optional[str] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -3898,6 +3901,9 @@ def _list_rows_from_query_results( would otherwise be a successful response. If multiple requests are made under the hood, ``timeout`` applies to each individual request. + query_id (Optional[str]): + [Preview] ID of a completed query. This ID is auto-generated + and not guaranteed to be populated. Returns: google.cloud.bigquery.table.RowIterator: Iterator of row data @@ -3928,6 +3934,10 @@ def _list_rows_from_query_results( table=destination, extra_params=params, total_rows=total_rows, + project=project, + location=location, + job_id=job_id, + query_id=query_id, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 57186acbc..a48a15f85 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -930,6 +930,15 @@ def query(self): self._properties, ["configuration", "query", "query"] ) + @property + def query_id(self) -> Optional[str]: + """[Preview] ID of a completed query. + + This ID is auto-generated and not guaranteed to be populated. + """ + query_results = self._query_results + return query_results.query_id if query_results is not None else None + @property def query_parameters(self): """See @@ -1525,7 +1534,12 @@ def result( # type: ignore # (complaints about the overloaded signature) provided and the job is not retryable. """ if self.dry_run: - return _EmptyRowIterator() + return _EmptyRowIterator( + project=self.project, + location=self.location, + # Intentionally omit job_id and query_id since this doesn't + # actually correspond to a finished query job. + ) try: retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: @@ -1594,7 +1608,12 @@ def do_get_result(): # indicate success and avoid calling tabledata.list on a table which # can't be read (such as a view table). if self._query_results.total_rows is None: - return _EmptyRowIterator() + return _EmptyRowIterator( + location=self.location, + project=self.project, + job_id=self.job_id, + query_id=self.query_id, + ) rows = self._client._list_rows_from_query_results( self.job_id, @@ -1608,6 +1627,7 @@ def do_get_result(): start_index=start_index, retry=retry, timeout=timeout, + query_id=self.query_id, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 944ad884e..ccc8840be 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -911,6 +911,14 @@ def job_id(self): """ return self._properties.get("jobReference", {}).get("jobId") + @property + def query_id(self) -> Optional[str]: + """[Preview] ID of a completed query. + + This ID is auto-generated and not guaranteed to be populated. + """ + return self._properties.get("queryId") + @property def page_token(self): """Token for fetching next bach of results. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index dcba10428..168448c99 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1558,6 +1558,10 @@ def __init__( selected_fields=None, total_rows=None, first_page_response=None, + location: Optional[str] = None, + job_id: Optional[str] = None, + query_id: Optional[str] = None, + project: Optional[str] = None, ): super(RowIterator, self).__init__( client, @@ -1575,12 +1579,51 @@ def __init__( self._field_to_index = _helpers._field_to_index_mapping(schema) self._page_size = page_size self._preserve_order = False - self._project = client.project if client is not None else None self._schema = schema self._selected_fields = selected_fields self._table = table self._total_rows = total_rows self._first_page_response = first_page_response + self._location = location + self._job_id = job_id + self._query_id = query_id + self._project = project + + @property + def _billing_project(self) -> Optional[str]: + """GCP Project ID where BQ API will bill to (if applicable).""" + client = self.client + return client.project if client is not None else None + + @property + def job_id(self) -> Optional[str]: + """ID of the query job (if applicable). + + To get the job metadata, call + ``job = client.get_job(rows.job_id, location=rows.location)``. + """ + return self._job_id + + @property + def location(self) -> Optional[str]: + """Location where the query executed (if applicable). + + See: https://cloud.google.com/bigquery/docs/locations + """ + return self._location + + @property + def project(self) -> Optional[str]: + """GCP Project ID where these rows are read from.""" + return self._project + + @property + def query_id(self) -> Optional[str]: + """[Preview] ID of a completed query. + + This ID is auto-generated and not guaranteed to be populated. + """ + return self._query_id def _is_completely_cached(self): """Check if all results are completely cached. @@ -1723,7 +1766,7 @@ def to_arrow_iterable( bqstorage_download = functools.partial( _pandas_helpers.download_arrow_bqstorage, - self._project, + self._billing_project, self._table, bqstorage_client, preserve_order=self._preserve_order, @@ -1903,7 +1946,7 @@ def to_dataframe_iterable( column_names = [field.name for field in self._schema] bqstorage_download = functools.partial( _pandas_helpers.download_dataframe_bqstorage, - self._project, + self._billing_project, self._table, bqstorage_client, column_names, diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 26f1f2a73..39275063a 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -952,6 +952,7 @@ def test_result(self): }, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", + "queryId": "abc-def", } job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") @@ -980,6 +981,10 @@ def test_result(self): rows = list(result) self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") + self.assertEqual(result.job_id, self.JOB_ID) + self.assertEqual(result.location, "EU") + self.assertEqual(result.project, self.PROJECT) + self.assertEqual(result.query_id, "abc-def") # Test that the total_rows property has changed during iteration, based # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) @@ -1023,6 +1028,12 @@ def test_result_dry_run(self): calls = conn.api_request.mock_calls self.assertIsInstance(result, _EmptyRowIterator) self.assertEqual(calls, []) + self.assertEqual(result.location, "EU") + self.assertEqual(result.project, self.PROJECT) + # Intentionally omit job_id and query_id since this doesn't + # actually correspond to a finished query job. + self.assertIsNone(result.job_id) + self.assertIsNone(result.query_id) def test_result_with_done_job_calls_get_query_results(self): query_resource_done = { @@ -1180,16 +1191,21 @@ def test_result_w_empty_schema(self): "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": []}, + "queryId": "xyz-abc", } connection = make_connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) + resource = self._make_resource(ended=True, location="asia-northeast1") job = self._get_target_class().from_api_repr(resource, client) result = job.result() self.assertIsInstance(result, _EmptyRowIterator) self.assertEqual(list(result), []) + self.assertEqual(result.project, self.PROJECT) + self.assertEqual(result.job_id, self.JOB_ID) + self.assertEqual(result.location, "asia-northeast1") + self.assertEqual(result.query_id, "xyz-abc") def test_result_invokes_begins(self): begun_resource = self._make_resource() diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index f4c7eb06e..0accae0a2 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -560,7 +560,7 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): [name_array, age_array], schema=arrow_schema ) connection = make_connection(query_resource) - client = _make_client(connection=connection) + client = _make_client(connection=connection, project="bqstorage-billing-project") job = target_class.from_api_repr(resource, client) session = bigquery_storage.types.ReadSession() session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() @@ -597,7 +597,9 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): **table_read_options_kwarg, ) bqstorage_client.create_read_session.assert_called_once_with( - parent=f"projects/{client.project}", + # The billing project can differ from the data project. Make sure we + # are charging to the billing project, not the data project. + parent="projects/bqstorage-billing-project", read_session=expected_session, max_stream_count=0, # Use default number of streams for best performance. ) @@ -618,7 +620,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, } connection = make_connection(query_resource) - client = _make_client(connection=connection) + client = _make_client(connection=connection, project="bqstorage-billing-project") job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() @@ -646,7 +648,9 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): data_format=bigquery_storage.DataFormat.ARROW, ) bqstorage_client.create_read_session.assert_called_once_with( - parent=f"projects/{client.project}", + # The billing project can differ from the data project. Make sure we + # are charging to the billing project, not the data project. + parent="projects/bqstorage-billing-project", read_session=expected_session, max_stream_count=0, ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index d470bd9fd..af61ceb42 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -6401,11 +6401,16 @@ def test_list_rows(self): age = SchemaField("age", "INTEGER", mode="NULLABLE") joined = SchemaField("joined", "TIMESTAMP", mode="NULLABLE") table = Table(self.TABLE_REF, schema=[full_name, age, joined]) + table._properties["location"] = "us-central1" table._properties["numRows"] = 7 iterator = client.list_rows(table, timeout=7.5) - # Check that initial total_rows is populated from the table. + # Check that initial RowIterator is populated from the table metadata. + self.assertIsNone(iterator.job_id) + self.assertEqual(iterator.location, "us-central1") + self.assertEqual(iterator.project, table.project) + self.assertIsNone(iterator.query_id) self.assertEqual(iterator.total_rows, 7) page = next(iterator.pages) rows = list(page) @@ -6521,6 +6526,10 @@ def test_list_rows_empty_table(self): selected_fields=[], ) + self.assertIsNone(rows.job_id) + self.assertIsNone(rows.location) + self.assertEqual(rows.project, self.TABLE_REF.project) + self.assertIsNone(rows.query_id) # When a table reference / string and selected_fields is provided, # total_rows can't be populated until iteration starts. self.assertIsNone(rows.total_rows) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 4b687152f..aae4890b3 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1386,6 +1386,16 @@ def test_page_token_present(self): query = self._make_one(resource) self.assertEqual(query.page_token, "TOKEN") + def test_query_id_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.query_id) + + def test_query_id_present(self): + resource = self._make_resource() + resource["queryId"] = "test-query-id" + query = self._make_one(resource) + self.assertEqual(query.query_id, "test-query-id") + def test_total_rows_present_integer(self): resource = self._make_resource() resource["totalRows"] = 42 diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index fa2f30cea..d9f259e72 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2113,6 +2113,38 @@ def test_constructor_with_dict_schema(self): ] self.assertEqual(iterator.schema, expected_schema) + def test_job_id_missing(self): + rows = self._make_one() + self.assertIsNone(rows.job_id) + + def test_job_id_present(self): + rows = self._make_one(job_id="abc-123") + self.assertEqual(rows.job_id, "abc-123") + + def test_location_missing(self): + rows = self._make_one() + self.assertIsNone(rows.location) + + def test_location_present(self): + rows = self._make_one(location="asia-northeast1") + self.assertEqual(rows.location, "asia-northeast1") + + def test_project_missing(self): + rows = self._make_one() + self.assertIsNone(rows.project) + + def test_project_present(self): + rows = self._make_one(project="test-project") + self.assertEqual(rows.project, "test-project") + + def test_query_id_missing(self): + rows = self._make_one() + self.assertIsNone(rows.query_id) + + def test_query_id_present(self): + rows = self._make_one(query_id="xyz-987") + self.assertEqual(rows.query_id, "xyz-987") + def test_iterate(self): from google.cloud.bigquery.schema import SchemaField From 6290517d6b153a31f20098f75aee580b7915aca9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Nov 2023 09:13:47 -0600 Subject: [PATCH 191/536] perf: use the first page a results when `query(api_method="QUERY")` (#1723) * perf: use the first page a results when `query(api_method="QUERY")` * add tests * respect max_results with cached page * respect page_size, also avoid bqstorage if almost fully downloaded * skip true test if bqstorage not installed * coverage --- google/cloud/bigquery/_job_helpers.py | 12 +-- google/cloud/bigquery/client.py | 9 ++ google/cloud/bigquery/job/query.py | 13 ++- google/cloud/bigquery/query.py | 8 -- google/cloud/bigquery/table.py | 44 ++++++++- tests/unit/job/test_query.py | 109 +++++++++++++++++++++ tests/unit/test_query.py | 8 +- tests/unit/test_table.py | 105 ++++++++++++++++++-- tests/unit/test_table_arrow.py | 134 ++++++++++++++++++++++++++ tests/unit/test_table_pandas.py | 59 ++++++++++++ 10 files changed, 468 insertions(+), 33 deletions(-) create mode 100644 tests/unit/test_table_arrow.py diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 09daaa2a2..7992f28b6 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -22,6 +22,7 @@ from google.api_core import retry as retries from google.cloud.bigquery import job +import google.cloud.bigquery.query # Avoid circular imports if TYPE_CHECKING: # pragma: NO COVER @@ -197,14 +198,9 @@ def _to_query_job( job_complete = query_response.get("jobComplete") if job_complete: query_job._properties["status"]["state"] = "DONE" - # TODO: https://github.com/googleapis/python-bigquery/issues/589 - # Set the first page of results if job is "complete" and there is - # only 1 page of results. Otherwise, use the existing logic that - # refreshes the job stats. - # - # This also requires updates to `to_dataframe` and the DB API connector - # so that they don't try to read from a destination table if all the - # results are present. + query_job._query_results = google.cloud.bigquery.query._QueryResults( + query_response + ) else: query_job._properties["status"]["state"] = "PENDING" diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 4e72ac922..488a9ad29 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3862,6 +3862,7 @@ def _list_rows_from_query_results( retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, query_id: Optional[str] = None, + first_page_response: Optional[Dict[str, Any]] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -3904,6 +3905,8 @@ def _list_rows_from_query_results( query_id (Optional[str]): [Preview] ID of a completed query. This ID is auto-generated and not guaranteed to be populated. + first_page_response (Optional[dict]): + API response for the first page of results (if available). Returns: google.cloud.bigquery.table.RowIterator: Iterator of row data @@ -3923,6 +3926,11 @@ def _list_rows_from_query_results( if start_index is not None: params["startIndex"] = start_index + # We don't call jobs.query with a page size, so if the user explicitly + # requests a certain size, invalidate the cache. + if page_size is not None: + first_page_response = None + params["formatOptions.useInt64Timestamp"] = True row_iterator = RowIterator( client=self, @@ -3938,6 +3946,7 @@ def _list_rows_from_query_results( location=location, job_id=job_id, query_id=query_id, + first_page_response=first_page_response, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index a48a15f85..79cd207a1 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1586,7 +1586,8 @@ def do_get_result(): # Since the job could already be "done" (e.g. got a finished job # via client.get_job), the superclass call to done() might not # set the self._query_results cache. - self._reload_query_results(retry=retry, timeout=timeout) + if self._query_results is None or not self._query_results.complete: + self._reload_query_results(retry=retry, timeout=timeout) if retry_do_query is not None and job_retry is not None: do_get_result = job_retry(do_get_result) @@ -1615,6 +1616,15 @@ def do_get_result(): query_id=self.query_id, ) + # We know that there's at least 1 row, so only treat the response from + # jobs.getQueryResults / jobs.query as the first page of the + # RowIterator response if there are any rows in it. This prevents us + # from stopping the iteration early because we're missing rows and + # there's no next page token. + first_page_response = self._query_results._properties + if "rows" not in first_page_response: + first_page_response = None + rows = self._client._list_rows_from_query_results( self.job_id, self.location, @@ -1628,6 +1638,7 @@ def do_get_result(): retry=retry, timeout=timeout, query_id=self.query_id, + first_page_response=first_page_response, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index ccc8840be..54abe95a7 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1005,14 +1005,6 @@ def _set_properties(self, api_response): Args: api_response (Dict): Response returned from an API call """ - job_id_present = ( - "jobReference" in api_response - and "jobId" in api_response["jobReference"] - and "projectId" in api_response["jobReference"] - ) - if not job_id_present: - raise ValueError("QueryResult requires a job reference") - self._properties.clear() self._properties.update(copy.deepcopy(api_response)) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 168448c99..dca9f7962 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -100,6 +100,10 @@ "because the necessary `__from_arrow__` attribute is missing." ) +# How many of the total rows need to be downloaded already for us to skip +# calling the BQ Storage API? +ALMOST_COMPLETELY_CACHED_RATIO = 0.333 + def _reference_getter(table): """A :class:`~google.cloud.bigquery.table.TableReference` pointing to @@ -1625,16 +1629,31 @@ def query_id(self) -> Optional[str]: """ return self._query_id - def _is_completely_cached(self): + def _is_almost_completely_cached(self): """Check if all results are completely cached. This is useful to know, because we can avoid alternative download mechanisms. """ - if self._first_page_response is None or self.next_page_token: + if self._first_page_response is None: return False - return self._first_page_response.get(self._next_token) is None + total_cached_rows = len(self._first_page_response.get(self._items_key, [])) + if self.max_results is not None and total_cached_rows >= self.max_results: + return True + + if ( + self.next_page_token is None + and self._first_page_response.get(self._next_token) is None + ): + return True + + if self._total_rows is not None: + almost_completely = self._total_rows * ALMOST_COMPLETELY_CACHED_RATIO + if total_cached_rows >= almost_completely: + return True + + return False def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): """Returns True if the BigQuery Storage API can be used. @@ -1647,7 +1666,14 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): if not using_bqstorage_api: return False - if self._is_completely_cached(): + if self._table is None: + return False + + # The developer is manually paging through results if this is set. + if self.next_page_token is not None: + return False + + if self._is_almost_completely_cached(): return False if self.max_results is not None: @@ -1671,7 +1697,15 @@ def _get_next_page_response(self): The parsed JSON response of the next page's contents. """ if self._first_page_response: - response = self._first_page_response + rows = self._first_page_response.get(self._items_key, [])[ + : self.max_results + ] + response = { + self._items_key: rows, + } + if self._next_token in self._first_page_response: + response[self._next_token] = self._first_page_response[self._next_token] + self._first_page_response = None return response diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 39275063a..776234b5b 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -25,6 +25,7 @@ import requests from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS +import google.cloud.bigquery._job_helpers import google.cloud.bigquery.query from google.cloud.bigquery.table import _EmptyRowIterator @@ -1081,6 +1082,114 @@ def test_result_with_done_job_calls_get_query_results(self): timeout=None, ) conn.api_request.assert_has_calls([query_results_call, query_results_page_call]) + assert conn.api_request.call_count == 2 + + def test_result_with_done_jobs_query_response_doesnt_call_get_query_results(self): + """With a done result from jobs.query, we don't need to call + jobs.getQueryResults to wait for the query to finish. + + jobs.get is still called because there is an assumption that after + QueryJob.result(), all job metadata is available locally. + """ + job_resource = self._make_resource(started=True, ended=True, location="EU") + conn = make_connection(job_resource) + client = _make_client(self.PROJECT, connection=conn) + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "rows": [{"f": [{"v": "abc"}]}], + "totalRows": "1", + } + job = google.cloud.bigquery._job_helpers._to_query_job( + client, + "SELECT 'abc' AS col1", + request_config=None, + query_response=query_resource_done, + ) + assert job.state == "DONE" + + result = job.result() + + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + job_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}" + conn.api_request.assert_called_once_with( + method="GET", + path=job_path, + query_params={}, + timeout=None, + ) + + def test_result_with_done_jobs_query_response_and_page_size_invalidates_cache(self): + """We don't call jobs.query with a page size, so if the user explicitly + requests a certain size, invalidate the cache. + """ + # Arrange + job_resource = self._make_resource( + started=True, ended=True, location="asia-northeast1" + ) + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "rows": [{"f": [{"v": "abc"}]}], + "pageToken": "initial-page-token-shouldnt-be-used", + "totalRows": "4", + } + query_page_resource = { + "totalRows": 4, + "pageToken": "some-page-token", + "rows": [ + {"f": [{"v": "row1"}]}, + {"f": [{"v": "row2"}]}, + {"f": [{"v": "row3"}]}, + ], + } + query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + conn = make_connection(job_resource, query_page_resource, query_page_resource_2) + client = _make_client(self.PROJECT, connection=conn) + job = google.cloud.bigquery._job_helpers._to_query_job( + client, + "SELECT col1 FROM table", + request_config=None, + query_response=query_resource_done, + ) + assert job.state == "DONE" + + # Act + result = job.result(page_size=3) + + # Assert + actual_rows = list(result) + self.assertEqual(len(actual_rows), 4) + + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" + query_page_1_call = mock.call( + method="GET", + path=query_results_path, + query_params={ + "maxResults": 3, + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "asia-northeast1", + "formatOptions.useInt64Timestamp": True, + }, + timeout=None, + ) + query_page_2_call = mock.call( + method="GET", + path=query_results_path, + query_params={ + "pageToken": "some-page-token", + "maxResults": 3, + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "asia-northeast1", + "formatOptions.useInt64Timestamp": True, + }, + timeout=None, + ) + conn.api_request.assert_has_calls([query_page_1_call, query_page_2_call]) def test_result_with_max_results(self): from google.cloud.bigquery.table import RowIterator diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index aae4890b3..949c1993b 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1362,13 +1362,13 @@ def test_errors_present(self): self.assertEqual(query.errors, ERRORS) def test_job_id_missing(self): - with self.assertRaises(ValueError): - self._make_one({}) + query = self._make_one({}) + self.assertIsNone(query.job_id) def test_job_id_broken_job_reference(self): resource = {"jobReference": {"bogus": "BOGUS"}} - with self.assertRaises(ValueError): - self._make_one(resource) + query = self._make_one(resource) + self.assertIsNone(query.job_id) def test_job_id_present(self): resource = self._make_resource() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index d9f259e72..05ad8de6e 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2210,6 +2210,39 @@ def test_iterate_with_cached_first_page(self): method="GET", path=path, query_params={"pageToken": "next-page"} ) + def test_iterate_with_cached_first_page_max_results(self): + from google.cloud.bigquery.schema import SchemaField + + first_page = { + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + "pageToken": "next-page", + } + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + path = "/foo" + api_request = mock.Mock(return_value=first_page) + row_iterator = self._make_one( + _mock_client(), + api_request, + path, + schema, + max_results=3, + first_page_response=first_page, + ) + rows = list(row_iterator) + self.assertEqual(len(rows), 3) + self.assertEqual(rows[0].age, 27) + self.assertEqual(rows[1].age, 28) + self.assertEqual(rows[2].age, 32) + api_request.assert_not_called() + def test_page_size(self): from google.cloud.bigquery.schema import SchemaField @@ -2235,19 +2268,58 @@ def test_page_size(self): query_params={"maxResults": row_iterator._page_size}, ) - def test__is_completely_cached_returns_false_without_first_page(self): + def test__is_almost_completely_cached_returns_false_without_first_page(self): iterator = self._make_one(first_page_response=None) - self.assertFalse(iterator._is_completely_cached()) + self.assertFalse(iterator._is_almost_completely_cached()) - def test__is_completely_cached_returns_false_with_page_token(self): - first_page = {"pageToken": "next-page"} + def test__is_almost_completely_cached_returns_true_with_more_rows_than_max_results( + self, + ): + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + ] + first_page = {"pageToken": "next-page", "rows": rows} + iterator = self._make_one(max_results=4, first_page_response=first_page) + self.assertTrue(iterator._is_almost_completely_cached()) + + def test__is_almost_completely_cached_returns_false_with_too_many_rows_remaining( + self, + ): + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + first_page = {"pageToken": "next-page", "rows": rows} + iterator = self._make_one(first_page_response=first_page, total_rows=100) + self.assertFalse(iterator._is_almost_completely_cached()) + + def test__is_almost_completely_cached_returns_false_with_rows_remaining_and_no_total_rows( + self, + ): + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + first_page = {"pageToken": "next-page", "rows": rows} iterator = self._make_one(first_page_response=first_page) - self.assertFalse(iterator._is_completely_cached()) + self.assertFalse(iterator._is_almost_completely_cached()) + + def test__is_almost_completely_cached_returns_true_with_some_rows_remaining(self): + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + first_page = {"pageToken": "next-page", "rows": rows} + iterator = self._make_one(first_page_response=first_page, total_rows=6) + self.assertTrue(iterator._is_almost_completely_cached()) - def test__is_completely_cached_returns_true(self): + def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self): first_page = {"rows": []} iterator = self._make_one(first_page_response=first_page) - self.assertTrue(iterator._is_completely_cached()) + self.assertTrue(iterator._is_almost_completely_cached()) def test__validate_bqstorage_returns_false_when_completely_cached(self): first_page = {"rows": []} @@ -2258,6 +2330,25 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test__validate_bqstorage_returns_true_if_no_cached_results(self): + iterator = self._make_one(first_page_response=None) # not cached + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertTrue(result) + + def test__validate_bqstorage_returns_false_if_page_token_set(self): + iterator = self._make_one( + page_token="abc", first_page_response=None # not cached + ) + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test__validate_bqstorage_returns_false_if_max_results_set(self): iterator = self._make_one( max_results=10, first_page_response=None # not cached diff --git a/tests/unit/test_table_arrow.py b/tests/unit/test_table_arrow.py new file mode 100644 index 000000000..6f1e6f76a --- /dev/null +++ b/tests/unit/test_table_arrow.py @@ -0,0 +1,134 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from google.cloud import bigquery +import google.cloud.bigquery.table + + +pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") + + +def test_to_arrow_with_jobs_query_response(): + resource = { + "kind": "bigquery#queryResponse", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "jobReference": { + "projectId": "test-project", + "jobId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + "location": "US", + }, + "totalRows": "9", + "rows": [ + {"f": [{"v": "Tiarra"}, {"v": "6"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}]}, + {"f": [{"v": "Tina"}, {"v": "26"}]}, + {"f": [{"v": "Tierra"}, {"v": "10"}]}, + {"f": [{"v": "Tia"}, {"v": "17"}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + ], + "totalBytesProcessed": "154775150", + "jobComplete": True, + "cacheHit": False, + "queryId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + } + + rows = google.cloud.bigquery.table.RowIterator( + client=None, + api_request=None, + path=None, + schema=[ + bigquery.SchemaField.from_api_repr(field) + for field in resource["schema"]["fields"] + ], + first_page_response=resource, + ) + records = rows.to_arrow() + + assert records.column_names == ["name", "number"] + assert records["name"].to_pylist() == [ + "Tiarra", + "Timothy", + "Tina", + "Tierra", + "Tia", + "Tiara", + "Tiana", + "Tiffany", + "Tiffani", + ] + assert records["number"].to_pylist() == [6, 325, 26, 10, 17, 22, 6, 229, 8] + + +def test_to_arrow_with_jobs_query_response_and_max_results(): + resource = { + "kind": "bigquery#queryResponse", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "jobReference": { + "projectId": "test-project", + "jobId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + "location": "US", + }, + "totalRows": "9", + "rows": [ + {"f": [{"v": "Tiarra"}, {"v": "6"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}]}, + {"f": [{"v": "Tina"}, {"v": "26"}]}, + {"f": [{"v": "Tierra"}, {"v": "10"}]}, + {"f": [{"v": "Tia"}, {"v": "17"}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + ], + "totalBytesProcessed": "154775150", + "jobComplete": True, + "cacheHit": False, + "queryId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + } + + rows = google.cloud.bigquery.table.RowIterator( + client=None, + api_request=None, + path=None, + schema=[ + bigquery.SchemaField.from_api_repr(field) + for field in resource["schema"]["fields"] + ], + first_page_response=resource, + max_results=3, + ) + records = rows.to_arrow() + + assert records.column_names == ["name", "number"] + assert records["name"].to_pylist() == [ + "Tiarra", + "Timothy", + "Tina", + ] + assert records["number"].to_pylist() == [6, 325, 26] diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index dfe512eea..6970d9d65 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -201,3 +201,62 @@ def test_to_dataframe_arrays(monkeypatch, class_under_test): assert df.dtypes["int64_repeated"].name == "object" assert tuple(df["int64_repeated"][0]) == (-1, 0, 2) + + +def test_to_dataframe_with_jobs_query_response(class_under_test): + resource = { + "kind": "bigquery#queryResponse", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "jobReference": { + "projectId": "test-project", + "jobId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + "location": "US", + }, + "totalRows": "9", + "rows": [ + {"f": [{"v": "Tiarra"}, {"v": "6"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}]}, + {"f": [{"v": "Tina"}, {"v": "26"}]}, + {"f": [{"v": "Tierra"}, {"v": "10"}]}, + {"f": [{"v": "Tia"}, {"v": "17"}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + ], + "totalBytesProcessed": "154775150", + "jobComplete": True, + "cacheHit": False, + "queryId": "job_ocd3cb-N62QIslU7R5qKKa2_427J", + } + + rows = class_under_test( + client=None, + api_request=None, + path=None, + schema=[ + bigquery.SchemaField.from_api_repr(field) + for field in resource["schema"]["fields"] + ], + first_page_response=resource, + ) + df = rows.to_dataframe() + + assert list(df.columns) == ["name", "number"] + assert list(df["name"]) == [ + "Tiarra", + "Timothy", + "Tina", + "Tierra", + "Tia", + "Tiara", + "Tiana", + "Tiffany", + "Tiffani", + ] + assert list(df["number"]) == [6, 325, 26, 10, 17, 22, 6, 229, 8] From 55735791122f97b7f67cb962b489fd1f12210af5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Nov 2023 11:51:45 -0600 Subject: [PATCH 192/536] fix: ensure query job retry has longer deadline than API request deadline (#1734) In cases where we can't disambiguate API failure from job failure, this ensures we can still retry the job at least once. --- google/cloud/bigquery/retry.py | 9 +++++++-- tests/unit/test_retry.py | 5 +++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index d0830ed13..b01c0662c 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -34,7 +34,12 @@ auth_exceptions.TransportError, ) -_DEFAULT_JOB_DEADLINE = 60.0 * 10.0 # seconds +_DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes + +# Allow for a few retries after the API request times out. This relevant for +# rateLimitExceeded errors, which can be raised either by the Google load +# balancer or the BigQuery job server. +_DEFAULT_JOB_DEADLINE = 3.0 * _DEFAULT_RETRY_DEADLINE def _should_retry(exc): @@ -51,7 +56,7 @@ def _should_retry(exc): return reason in _RETRYABLE_REASONS -DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=600.0) +DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE) """The default retry object. Any method with a ``retry`` parameter will be retried automatically, diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 60d04de89..1109b7ff2 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -125,6 +125,7 @@ def test_DEFAULT_JOB_RETRY_predicate(): def test_DEFAULT_JOB_RETRY_deadline(): - from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY, DEFAULT_RETRY - assert DEFAULT_JOB_RETRY._deadline == 600 + # Make sure we can retry the job at least once. + assert DEFAULT_JOB_RETRY._deadline > DEFAULT_RETRY._deadline From f05dc69a1f8c65ac32085bfcc6950c2c83f8a843 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 22 Nov 2023 10:24:14 -0600 Subject: [PATCH 193/536] fix: `load_table_from_dataframe` now assumes there may be local null values (#1735) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if the remote schema is REQUIRED Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #1692 🦕 --- google/cloud/bigquery/_pandas_helpers.py | 10 ++++- tests/system/test_pandas.py | 47 ++++++++++++++++++++---- tests/unit/test__pandas_helpers.py | 47 +++++++++++++++++------- 3 files changed, 81 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 53db9511c..380df7b1d 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -178,12 +178,18 @@ def bq_to_arrow_field(bq_field, array_type=None): if arrow_type is not None: if array_type is not None: arrow_type = array_type # For GEOGRAPHY, at least initially - is_nullable = bq_field.mode.upper() == "NULLABLE" metadata = BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA.get( bq_field.field_type.upper() if bq_field.field_type else "" ) return pyarrow.field( - bq_field.name, arrow_type, nullable=is_nullable, metadata=metadata + bq_field.name, + arrow_type, + # Even if the remote schema is REQUIRED, there's a chance there's + # local NULL values. Arrow will gladly interpret these NULL values + # as non-NULL and give you an arbitrary value. See: + # https://github.com/googleapis/python-bigquery/issues/1692 + nullable=True, + metadata=metadata, ) warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name)) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index a46f8e3df..9f7fc242e 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -428,8 +428,7 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): - """Test that a DataFrame with required columns can be uploaded if a - BigQuery schema is specified. + """Test that a DataFrame can be uploaded to a table with required columns. See: https://github.com/googleapis/google-cloud-python/issues/8093 """ @@ -440,7 +439,6 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] dataframe = pandas.DataFrame(records, columns=["name", "age"]) - job_config = bigquery.LoadJobConfig(schema=table_schema) table_id = "{}.{}.load_table_from_dataframe_w_required".format( bigquery_client.project, dataset_id ) @@ -451,15 +449,50 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): bigquery.Table(table_id, schema=table_schema) ) - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = bigquery_client.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) load_job.result() table = bigquery_client.get_table(table) assert tuple(table.schema) == table_schema assert table.num_rows == 2 + for field in table.schema: + assert field.mode == "REQUIRED" + + +def test_load_table_from_dataframe_w_required_but_local_nulls_fails( + bigquery_client, dataset_id +): + """Test that a DataFrame with nulls can't be uploaded to a table with + required columns. + + See: https://github.com/googleapis/python-bigquery/issues/1692 + """ + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ) + + records = [ + {"name": "Chip", "age": 2}, + {"name": "Dale", "age": 3}, + {"name": None, "age": None}, + {"name": "Alvin", "age": 4}, + ] + dataframe = pandas.DataFrame(records, columns=["name", "age"]) + table_id = ( + "{}.{}.load_table_from_dataframe_w_required_but_local_nulls_fails".format( + bigquery_client.project, dataset_id + ) + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + with pytest.raises(google.api_core.exceptions.BadRequest, match="null"): + bigquery_client.load_table_from_dataframe(dataframe, table_id).result() def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 212a6f1dd..1f1b4eeb3 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1017,30 +1017,41 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): ) data = { - "field01": ["hello", "world"], - "field02": [b"abd", b"efg"], - "field03": [1, 2], - "field04": [3, 4], - "field05": [1.25, 9.75], - "field06": [-1.75, -3.5], - "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field01": ["hello", None, "world"], + "field02": [b"abd", b"efg", b"hij"], + "field03": [1, 2, 3], + "field04": [4, None, 5], + "field05": [1.25, 0.0, 9.75], + "field06": [-1.75, None, -3.5], + "field07": [ + decimal.Decimal("1.2345"), + decimal.Decimal("6.7891"), + -decimal.Decimal("10.111213"), + ], "field08": [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), ], - "field09": [True, False], - "field10": [False, True], + "field09": [True, False, True], + "field10": [False, True, None], "field11": [ datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=datetime.timezone.utc), + datetime.datetime(2022, 7, 14, 23, 59, 59, tzinfo=datetime.timezone.utc), ], - "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], - "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], + "field12": [datetime.date(9999, 12, 31), None, datetime.date(1970, 1, 1)], + "field13": [datetime.time(23, 59, 59, 999999), None, datetime.time(12, 0, 0)], "field14": [ datetime.datetime(1970, 1, 1, 0, 0, 0), + None, datetime.datetime(2012, 12, 21, 9, 7, 42), ], - "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], + "field15": [ + None, + "POINT(30 10)", + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], } dataframe = pandas.DataFrame(data) @@ -1049,7 +1060,11 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): assert len(arrow_schema) == len(bq_schema) for arrow_field in arrow_schema: - assert not arrow_field.nullable + # Even if the remote schema is REQUIRED, there's a chance there's + # local NULL values. Arrow will gladly interpret these NULL values + # as non-NULL and give you an arbitrary value. See: + # https://github.com/googleapis/python-bigquery/issues/1692 + assert arrow_field.nullable @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1101,7 +1116,11 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): arrow_schema = arrow_table.schema expected_fields = [ - pyarrow.field("field01", "string", nullable=False), + # Even if the remote schema is REQUIRED, there's a chance there's + # local NULL values. Arrow will gladly interpret these NULL values + # as non-NULL and give you an arbitrary value. See: + # https://github.com/googleapis/python-bigquery/issues/1692 + pyarrow.field("field01", "string", nullable=True), pyarrow.field("field02", "bool", nullable=True), ] assert list(arrow_schema) == expected_fields From 222e86e4e8af321ad8908c1c65d39d10bab9f565 Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 22 Nov 2023 12:51:21 -0800 Subject: [PATCH 194/536] chore: standardize samples directory - delete unneeded dependencies (#1732) * chore: standardize samples directory = delete unneeded dependencies * Removed unused import for linter --- samples/desktopapp/conftest.py | 23 +++ samples/desktopapp/requirements.txt | 15 -- samples/magics/requirements.txt | 10 +- samples/notebooks/conftest.py | 23 +++ samples/notebooks/requirements.txt | 7 - samples/snippets/jupyter_tutorial_test.py | 175 ---------------------- samples/snippets/requirements.txt | 18 +-- samples/snippets/user_credentials.py | 78 ---------- samples/snippets/user_credentials_test.py | 45 ------ 9 files changed, 48 insertions(+), 346 deletions(-) create mode 100644 samples/desktopapp/conftest.py create mode 100644 samples/notebooks/conftest.py delete mode 100644 samples/snippets/jupyter_tutorial_test.py delete mode 100644 samples/snippets/user_credentials.py delete mode 100644 samples/snippets/user_credentials_test.py diff --git a/samples/desktopapp/conftest.py b/samples/desktopapp/conftest.py new file mode 100644 index 000000000..fdc85a852 --- /dev/null +++ b/samples/desktopapp/conftest.py @@ -0,0 +1,23 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + + +@pytest.fixture +def bigquery_client_patch( + monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client +) -> None: + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 1eeffe32c..a5b3ad130 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,17 +1,2 @@ -db-dtypes==1.1.1 google-cloud-bigquery==3.11.4 -google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.57.0 -ipywidgets==8.1.0 -ipython===7.31.1; python_version == '3.7' -ipython===8.0.1; python_version == '3.8' -ipython==8.14.0; python_version >= '3.9' -matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.2; python_version >= '3.8' -pandas===1.3.5; python_version == '3.7' -pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1; python_version == '3.7' -pyarrow==14.0.1; python_version >= '3.8' -pytz==2023.3 -typing-extensions==4.7.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 8feb236fc..c8f6b2765 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,16 +1,8 @@ db-dtypes==1.1.1 +google.cloud.bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 -google-auth-oauthlib==1.0.0 -grpcio==1.57.0 -ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' -matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1; python_version == '3.7' -pyarrow==14.0.1; python_version >= '3.8' -pytz==2023.3 -typing-extensions==4.7.1 diff --git a/samples/notebooks/conftest.py b/samples/notebooks/conftest.py new file mode 100644 index 000000000..fdc85a852 --- /dev/null +++ b/samples/notebooks/conftest.py @@ -0,0 +1,23 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + + +@pytest.fixture +def bigquery_client_patch( + monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client +) -> None: + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 1eeffe32c..22c46297f 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,6 @@ db-dtypes==1.1.1 google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 -google-auth-oauthlib==1.0.0 -grpcio==1.57.0 -ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.14.0; python_version >= '3.9' @@ -11,7 +8,3 @@ matplotlib===3.5.3; python_version == '3.7' matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1; python_version == '3.7' -pyarrow==14.0.1; python_version >= '3.8' -pytz==2023.3 -typing-extensions==4.7.1 diff --git a/samples/snippets/jupyter_tutorial_test.py b/samples/snippets/jupyter_tutorial_test.py deleted file mode 100644 index 9d42a4eda..000000000 --- a/samples/snippets/jupyter_tutorial_test.py +++ /dev/null @@ -1,175 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import typing -from typing import Iterator - -import pytest - -if typing.TYPE_CHECKING: - from IPython.terminal.interactiveshell import TerminalInteractiveShell - -IPython = pytest.importorskip("IPython") -interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") -tools = pytest.importorskip("IPython.testing.tools") -matplotlib = pytest.importorskip("matplotlib") - -# Ignore semicolon lint warning because semicolons are used in notebooks -# flake8: noqa E703 - - -@pytest.fixture(scope="session") -def ipython() -> "TerminalInteractiveShell": - config = tools.default_config() - config.TerminalInteractiveShell.simple_prompt = True - shell = interactiveshell.TerminalInteractiveShell.instance(config=config) - return shell - - -@pytest.fixture() -def ipython_interactive( - request: pytest.FixtureRequest, ipython: "TerminalInteractiveShell" -) -> Iterator["TerminalInteractiveShell"]: - """Activate IPython's builtin hooks - - for the duration of the test scope. - """ - with ipython.builtin_trap: - yield ipython - - -def _strip_region_tags(sample_text: str) -> str: - """Remove blank lines and region tags from sample text""" - magic_lines = [ - line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line - ] - return "\n".join(magic_lines) - - -def test_jupyter_tutorial(ipython: "TerminalInteractiveShell") -> None: - matplotlib.use("agg") - ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") - - sample = """ - # [START bigquery_jupyter_magic_gender_by_year] - %%bigquery - SELECT - source_year AS year, - COUNT(is_male) AS birth_count - FROM `bigquery-public-data.samples.natality` - GROUP BY year - ORDER BY year DESC - LIMIT 15 - # [END bigquery_jupyter_magic_gender_by_year] - """ - result = ip.run_cell(_strip_region_tags(sample)) - result.raise_error() # Throws an exception if the cell failed. - - sample = """ - # [START bigquery_jupyter_magic_gender_by_year_var] - %%bigquery total_births - SELECT - source_year AS year, - COUNT(is_male) AS birth_count - FROM `bigquery-public-data.samples.natality` - GROUP BY year - ORDER BY year DESC - LIMIT 15 - # [END bigquery_jupyter_magic_gender_by_year_var] - """ - result = ip.run_cell(_strip_region_tags(sample)) - result.raise_error() # Throws an exception if the cell failed. - - assert "total_births" in ip.user_ns # verify that variable exists - total_births = ip.user_ns["total_births"] - # [START bigquery_jupyter_plot_births_by_year] - total_births.plot(kind="bar", x="year", y="birth_count") - # [END bigquery_jupyter_plot_births_by_year] - - sample = """ - # [START bigquery_jupyter_magic_gender_by_weekday] - %%bigquery births_by_weekday - SELECT - wday, - SUM(CASE WHEN is_male THEN 1 ELSE 0 END) AS male_births, - SUM(CASE WHEN is_male THEN 0 ELSE 1 END) AS female_births - FROM `bigquery-public-data.samples.natality` - WHERE wday IS NOT NULL - GROUP BY wday - ORDER BY wday ASC - # [END bigquery_jupyter_magic_gender_by_weekday] - """ - result = ip.run_cell(_strip_region_tags(sample)) - result.raise_error() # Throws an exception if the cell failed. - - assert "births_by_weekday" in ip.user_ns # verify that variable exists - births_by_weekday = ip.user_ns["births_by_weekday"] - # [START bigquery_jupyter_plot_births_by_weekday] - births_by_weekday.plot(x="wday") - # [END bigquery_jupyter_plot_births_by_weekday] - - # [START bigquery_jupyter_import_and_client] - from google.cloud import bigquery - - client = bigquery.Client() - # [END bigquery_jupyter_import_and_client] - - # [START bigquery_jupyter_query_plurality_by_year] - sql = """ - SELECT - plurality, - COUNT(1) AS count, - year - FROM - `bigquery-public-data.samples.natality` - WHERE - NOT IS_NAN(plurality) AND plurality > 1 - GROUP BY - plurality, year - ORDER BY - count DESC - """ - df = client.query(sql).to_dataframe() - df.head() - # [END bigquery_jupyter_query_plurality_by_year] - - # [START bigquery_jupyter_plot_plurality_by_year] - pivot_table = df.pivot(index="year", columns="plurality", values="count") - pivot_table.plot(kind="bar", stacked=True, figsize=(15, 7)) - # [END bigquery_jupyter_plot_plurality_by_year] - - # [START bigquery_jupyter_query_births_by_gestation] - sql = """ - SELECT - gestation_weeks, - COUNT(1) AS count - FROM - `bigquery-public-data.samples.natality` - WHERE - NOT IS_NAN(gestation_weeks) AND gestation_weeks <> 99 - GROUP BY - gestation_weeks - ORDER BY - gestation_weeks - """ - df = client.query(sql).to_dataframe() - # [END bigquery_jupyter_query_births_by_gestation] - - # [START bigquery_jupyter_plot_births_by_gestation] - ax = df.plot(kind="bar", x="gestation_weeks", y="count", figsize=(15, 7)) - ax.set_title("Count of Births by Gestation Weeks") - ax.set_xlabel("Gestation Weeks") - ax.set_ylabel("Count") - # [END bigquery_jupyter_plot_births_by_gestation] diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 72a77ad11..f49c7494f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,17 +1 @@ -db-dtypes==1.1.1 -google-cloud-bigquery==3.11.4 -google-cloud-bigquery-storage==2.22.0 -google-auth-oauthlib==1.0.0 -grpcio==1.57.0 -ipywidgets==8.1.0 -ipython===7.31.1; python_version == '3.7' -ipython===8.0.1; python_version == '3.8' -ipython==8.14.0; python_version >= '3.9' -matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.2; python_version >= '3.8' -pandas===1.3.5; python_version == '3.7' -pandas==2.0.3; python_version >= '3.8' -pyarrow==12.0.1; python_version == '3.7' -pyarrow==14.0.1; python_version >= '3.8' -pytz==2023.3 -typing-extensions==4.7.1 \ No newline at end of file +google-cloud-bigquery==3.11.4 \ No newline at end of file diff --git a/samples/snippets/user_credentials.py b/samples/snippets/user_credentials.py deleted file mode 100644 index 487a56c5f..000000000 --- a/samples/snippets/user_credentials.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Command-line application to run a query using user credentials. - -You must supply a client secrets file, which would normally be bundled with -your application. -""" - -import argparse - - -def main(project: str) -> None: - # [START bigquery_auth_user_flow] - from google_auth_oauthlib import flow - - # A local server is used as the callback URL in the auth flow. - appflow = flow.InstalledAppFlow.from_client_secrets_file( - "client_secrets.json", scopes=["https://www.googleapis.com/auth/bigquery"] - ) - - # This launches a local server to be used as the callback URL in the desktop - # app auth flow. If you are accessing the application remotely, such as over - # SSH or a remote Jupyter notebook, this flow will not work. Use the - # `gcloud auth application-default login --no-browser` command or workload - # identity federation to get authentication tokens, instead. - # - appflow.run_local_server() - - credentials = appflow.credentials - # [END bigquery_auth_user_flow] - - # [START bigquery_auth_user_query] - from google.cloud import bigquery - - # TODO: Uncomment the line below to set the `project` variable. - # project = 'user-project-id' - # - # The `project` variable defines the project to be billed for query - # processing. The user must have the bigquery.jobs.create permission on - # this project to run a query. See: - # https://cloud.google.com/bigquery/docs/access-control#permissions - - client = bigquery.Client(project=project, credentials=credentials) - - query_string = """SELECT name, SUM(number) as total - FROM `bigquery-public-data.usa_names.usa_1910_current` - WHERE name = 'William' - GROUP BY name; - """ - query_job = client.query(query_string) - - # Print the results. - for row in query_job.result(): # Wait for the job to complete. - print("{}: {}".format(row["name"], row["total"])) - # [END bigquery_auth_user_query] - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter - ) - parser.add_argument("project", help="Project to use for BigQuery billing.") - args = parser.parse_args() - main(args.project) diff --git a/samples/snippets/user_credentials_test.py b/samples/snippets/user_credentials_test.py deleted file mode 100644 index 8448187de..000000000 --- a/samples/snippets/user_credentials_test.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from typing import Iterator, Union - -import google.auth -import mock -import pytest - -from user_credentials import main # type: ignore - -PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] - -MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock] - - -@pytest.fixture -def mock_flow() -> Iterator[MockType]: - flow_patch = mock.patch("google_auth_oauthlib.flow.InstalledAppFlow", autospec=True) - - with flow_patch as flow_mock: - flow_mock.from_client_secrets_file.return_value = flow_mock - flow_mock.credentials = google.auth.default()[0] - yield flow_mock - - -def test_auth_query_console( - mock_flow: MockType, capsys: pytest.CaptureFixture[str] -) -> None: - main(PROJECT) - out, _ = capsys.readouterr() - # Fun fact: William P. Wood was the 1st director of the US Secret Service. - assert "William" in out From 5ce4d136af97b91fbe1cc56bba1021e50a9c8476 Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 22 Nov 2023 13:39:50 -0800 Subject: [PATCH 195/536] fix: move grpc, proto-plus and protobuf packages to extras (#1721) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: move grpc, proto-plus and protobuff packages to extras * formatted with black * feat: add `job_timeout_ms` to job configuration classes (#1675) * fix: adds new property and tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates docs to correct a sphinx failure * Updates formatting * Update tests/system/test_query.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update google/cloud/bigquery/job/base.py * updates one test and uses int_or_none * Update tests/system/test_query.py testing something. * Update tests/system/test_query.py * testing coverage feature * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * minor edits * tweaks to noxfile for testing purposes * add new test to base as experiment * adds a test, updates import statements * add another test * edit to tests * formatting fixes * update noxfile to correct debug code * removes unneeded comments. --------- Co-authored-by: Owl Bot --------- Co-authored-by: Chalmer Lowe Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- setup.py | 10 +++++----- tests/unit/test_legacy_types.py | 8 ++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 4e87b3b84..ead602e12 100644 --- a/setup.py +++ b/setup.py @@ -29,20 +29,16 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "grpcio >= 1.47.0, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/1262 - "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", # NOTE: Maintainers, please do not require google-api-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "proto-plus >= 1.15.0, <2.0.0dev", + "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-cloud-core >= 1.6.0, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 20.0.0", - "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", "requests >= 2.21.0, < 3.0.0dev", ] @@ -82,6 +78,10 @@ "opentelemetry-sdk >= 1.1.0", "opentelemetry-instrumentation >= 0.20b0", ], + "bigquery_v2": [ + "proto-plus >= 1.15.0, <2.0.0dev", + "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. + ], } all_extras = [] diff --git a/tests/unit/test_legacy_types.py b/tests/unit/test_legacy_types.py index 3f51cc511..3431074fd 100644 --- a/tests/unit/test_legacy_types.py +++ b/tests/unit/test_legacy_types.py @@ -13,9 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest + import warnings +try: + import proto # type: ignore +except ImportError: # pragma: NO COVER + proto = None + +@pytest.mark.skipif(proto is None, reason="proto is not installed") def test_importing_legacy_types_emits_warning(): with warnings.catch_warnings(record=True) as warned: from google.cloud.bigquery_v2 import types # noqa: F401 From 1f4ebb1eca4f9380a31172fc8cb2fae125f8c5a2 Mon Sep 17 00:00:00 2001 From: Kira Date: Tue, 28 Nov 2023 13:47:06 -0800 Subject: [PATCH 196/536] feat: removed pkg_resources from all test files and moved importlib into pandas extra (#1726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Introduce compatibility with native namespace packages * Update copyright year * removed pkg_resources from all test files and moved importlib into pandas extra * feat: removed pkg_resources from all test files and moved importlib into pandas extra * Adding no cover tag to test code * reformatted with black * undo revert * perf: use the first page a results when `query(api_method="QUERY")` (#1723) * perf: use the first page a results when `query(api_method="QUERY")` * add tests * respect max_results with cached page * respect page_size, also avoid bqstorage if almost fully downloaded * skip true test if bqstorage not installed * coverage * fix: ensure query job retry has longer deadline than API request deadline (#1734) In cases where we can't disambiguate API failure from job failure, this ensures we can still retry the job at least once. * fix: `load_table_from_dataframe` now assumes there may be local null values (#1735) Even if the remote schema is REQUIRED Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #1692 🦕 * chore: standardize samples directory - delete unneeded dependencies (#1732) * chore: standardize samples directory = delete unneeded dependencies * Removed unused import for linter * fix: move grpc, proto-plus and protobuf packages to extras (#1721) * chore: move grpc, proto-plus and protobuff packages to extras * formatted with black * feat: add `job_timeout_ms` to job configuration classes (#1675) * fix: adds new property and tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates docs to correct a sphinx failure * Updates formatting * Update tests/system/test_query.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update google/cloud/bigquery/job/base.py * updates one test and uses int_or_none * Update tests/system/test_query.py testing something. * Update tests/system/test_query.py * testing coverage feature * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * minor edits * tweaks to noxfile for testing purposes * add new test to base as experiment * adds a test, updates import statements * add another test * edit to tests * formatting fixes * update noxfile to correct debug code * removes unneeded comments. --------- Co-authored-by: Owl Bot --------- Co-authored-by: Chalmer Lowe Co-authored-by: Owl Bot Co-authored-by: Tim Swast * remove unnecessary version checks * undo bad commit, remove unneeded version checks * Revert "undo bad commit, remove unneeded version checks" This reverts commit 5c82dcf85f5fd73ad37ab7805b85b88a65351801. * Revert "remove unnecessary version checks" This reverts commit 9331a7e034de1c651c5ebc454f38f602d612a03d. * revert bad changes, remove pkg_resources from file * after clarification, reimplement changes and ignore 3.12 tests * reformatted with black * removed minimum check * updated pandas installed version check --------- Co-authored-by: Anthonios Partheniou Co-authored-by: Tim Swast Co-authored-by: Chalmer Lowe Co-authored-by: Owl Bot --- .coveragerc | 5 ---- google/__init__.py | 24 ------------------- google/cloud/__init__.py | 24 ------------------- noxfile.py | 5 ++-- setup.py | 10 ++------ tests/system/test_pandas.py | 23 ++++++++---------- tests/unit/job/test_query_pandas.py | 14 ++++++----- tests/unit/test__pandas_helpers.py | 32 ++++++++----------------- tests/unit/test_client.py | 23 +++++++----------- tests/unit/test_packaging.py | 37 +++++++++++++++++++++++++++++ tests/unit/test_table.py | 18 +++++++------- tests/unit/test_table_pandas.py | 14 ++++++----- 12 files changed, 96 insertions(+), 133 deletions(-) delete mode 100644 google/__init__.py delete mode 100644 google/cloud/__init__.py create mode 100644 tests/unit/test_packaging.py diff --git a/.coveragerc b/.coveragerc index 1ed1a9704..04092257a 100644 --- a/.coveragerc +++ b/.coveragerc @@ -12,8 +12,3 @@ exclude_lines = pragma: NO COVER # Ignore debug-only repr def __repr__ - # Ignore pkg_resources exceptions. - # This is added at the module level as a safeguard for if someone - # generates the code and tries to run it without pip installing. This - # makes it virtually impossible to test properly. - except pkg_resources.DistributionNotFound diff --git a/google/__init__.py b/google/__init__.py deleted file mode 100644 index 8e60d8439..000000000 --- a/google/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -try: - import pkg_resources - - pkg_resources.declare_namespace(__name__) -except ImportError: - import pkgutil - - __path__ = pkgutil.extend_path(__path__, __name__) # type: ignore diff --git a/google/cloud/__init__.py b/google/cloud/__init__.py deleted file mode 100644 index 8e60d8439..000000000 --- a/google/cloud/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -try: - import pkg_resources - - pkg_resources.declare_namespace(__name__) -except ImportError: - import pkgutil - - __path__ = pkgutil.extend_path(__path__, __name__) # type: ignore diff --git a/noxfile.py b/noxfile.py index 9ccbdd30c..246952728 100644 --- a/noxfile.py +++ b/noxfile.py @@ -137,7 +137,7 @@ def mypy(session): "types-requests", "types-setuptools", ) - session.run("mypy", "google/cloud", "--show-traceback") + session.run("mypy", "-p", "google", "--show-traceback") @nox.session(python=DEFAULT_PYTHON_VERSION) @@ -149,7 +149,8 @@ def pytype(session): session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) - session.run("pytype") + # See https://github.com/google/pytype/issues/464 + session.run("pytype", "-P", ".", "google/cloud/bigquery") @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) diff --git a/setup.py b/setup.py index ead602e12..f21bb586d 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ "pandas>=1.1.0", pyarrow_dependency, "db-dtypes>=0.3.0,<2.0.0dev", + "importlib_metadata>=1.0.0; python_version<'3.8'", ], "ipywidgets": [ "ipywidgets>=7.7.0", @@ -108,16 +109,10 @@ # benchmarks, etc. packages = [ package - for package in setuptools.PEP420PackageFinder.find() + for package in setuptools.find_namespace_packages() if package.startswith("google") ] -# Determine which namespaces are needed. -namespaces = ["google"] -if "google.cloud" in packages: - namespaces.append("google.cloud") - - setuptools.setup( name=name, version=version, @@ -143,7 +138,6 @@ ], platforms="Posix; MacOS X; Windows", packages=packages, - namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, python_requires=">=3.7", diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 9f7fc242e..e93f245c0 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -23,9 +23,13 @@ import warnings import google.api_core.retry -import pkg_resources import pytest +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata + from google.cloud import bigquery from google.cloud.bigquery import enums @@ -42,11 +46,9 @@ ) if pandas is not None: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") - -PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" class MissingDataError(Exception): @@ -310,10 +312,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i ] -@pytest.mark.skipif( - PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION, - reason="Only `pandas version >=1.0.0` is supported", -) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_load_table_from_dataframe_w_nullable_int64_datatype( bigquery_client, dataset_id ): @@ -342,7 +341,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype( @pytest.mark.skipif( - PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION, + PANDAS_INSTALLED_VERSION[0:2].startswith("0."), reason="Only `pandas version >=1.0.0` is supported", ) def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( @@ -1043,9 +1042,7 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): assert len(dataframe.index) == 100 -@pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" -) +@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") @pytest.mark.parametrize( ("max_results",), ( diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 0accae0a2..6189830ff 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -17,7 +17,6 @@ import json import mock -import pkg_resources import pytest @@ -45,14 +44,19 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata + from ..helpers import make_connection from .helpers import _make_client from .helpers import _make_job_resource if pandas is not None: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" pandas = pytest.importorskip("pandas") @@ -656,9 +660,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) -@pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" -) +@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 1f1b4eeb3..ad40a6da6 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -19,7 +19,11 @@ import operator import queue import warnings -import pkg_resources + +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata import mock @@ -57,13 +61,10 @@ bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import() -PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") - if pandas is not None: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: - # Set to less than MIN version. - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" skip_if_no_bignumeric = pytest.mark.skipif( @@ -542,9 +543,7 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" -) +@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] @@ -806,10 +805,7 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( assert columns_and_indexes == expected -@pytest.mark.skipif( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, - reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA", -) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_json_generator(module_under_test): utcnow = datetime.datetime.utcnow() df_data = collections.OrderedDict( @@ -837,16 +833,8 @@ def test_dataframe_to_json_generator(module_under_test): assert list(rows) == expected +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_json_generator_repeated_field(module_under_test): - pytest.importorskip( - "pandas", - minversion=str(PANDAS_MINIUM_VERSION), - reason=( - f"Requires `pandas version >= {PANDAS_MINIUM_VERSION}` " - "which introduces pandas.NA" - ), - ) - df_data = [ collections.OrderedDict( [("repeated_col", [pandas.NA, 2, None, 4]), ("not_repeated_col", "first")] diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index af61ceb42..ff4c40f48 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -30,7 +30,11 @@ import requests import packaging import pytest -import pkg_resources + +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata try: import pandas @@ -76,13 +80,10 @@ from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection -PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") - if pandas is not None: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: - # Set to less than MIN version. - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" def _make_credentials(): @@ -8145,10 +8146,7 @@ def test_load_table_from_dataframe_unknown_table(self): timeout=DEFAULT_TIMEOUT, ) - @unittest.skipIf( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, - "Only `pandas version >=1.0.0` supported", - ) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -8193,10 +8191,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): SchemaField("x", "INT64", "NULLABLE", None), ) - @unittest.skipIf( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, - "Only `pandas version >=1.0.0` supported", - ) + @unittest.skipIf(pandas is None, "Requires `pandas`") # @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES diff --git a/tests/unit/test_packaging.py b/tests/unit/test_packaging.py new file mode 100644 index 000000000..6f1b16c66 --- /dev/null +++ b/tests/unit/test_packaging.py @@ -0,0 +1,37 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess +import sys + + +def test_namespace_package_compat(tmp_path): + # The ``google`` namespace package should not be masked + # by the presence of ``google-cloud-bigquery``. + google = tmp_path / "google" + google.mkdir() + google.joinpath("othermod.py").write_text("") + env = dict(os.environ, PYTHONPATH=str(tmp_path)) + cmd = [sys.executable, "-m", "google.othermod"] + subprocess.check_call(cmd, env=env) + + # The ``google.cloud`` namespace package should not be masked + # by the presence of ``google-cloud-bigquery``. + google_cloud = tmp_path / "google" / "cloud" + google_cloud.mkdir() + google_cloud.joinpath("othermod.py").write_text("") + env = dict(os.environ, PYTHONPATH=str(tmp_path)) + cmd = [sys.executable, "-m", "google.cloud.othermod"] + subprocess.check_call(cmd, env=env) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 05ad8de6e..85f335dd1 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -22,9 +22,13 @@ import warnings import mock -import pkg_resources import pytest +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata + import google.api_core.exceptions from test_utils.imports import maybe_fail_import @@ -71,9 +75,9 @@ tqdm = None if pandas is not None: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" def _mock_client(): @@ -3793,9 +3797,7 @@ def test_to_dataframe_w_dtypes_mapper(self): self.assertEqual(df.timestamp.dtype.name, "object") @unittest.skipIf(pandas is None, "Requires `pandas`") - @pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" - ) + @pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") def test_to_dataframe_w_none_dtypes_mapper(self): from google.cloud.bigquery.schema import SchemaField @@ -3908,9 +3910,7 @@ def test_to_dataframe_w_unsupported_dtypes_mapper(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" - ) + @pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") def test_to_dataframe_column_dtypes(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 6970d9d65..b38568561 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -15,7 +15,11 @@ import datetime import decimal from unittest import mock -import pkg_resources + +try: + import importlib.metadata as metadata +except ImportError: + import importlib_metadata as metadata import pytest @@ -28,9 +32,9 @@ TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data" if pandas is not None: # pragma: NO COVER - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + PANDAS_INSTALLED_VERSION = metadata.version("pandas") else: # pragma: NO COVER - PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + PANDAS_INSTALLED_VERSION = "0.0.0" @pytest.fixture @@ -40,9 +44,7 @@ def class_under_test(): return RowIterator -@pytest.mark.skipif( - PANDAS_INSTALLED_VERSION >= pkg_resources.parse_version("2.0.0"), reason="" -) +@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): # See tests/system/test_arrow.py for the actual types we get from the API. arrow_schema = pyarrow.schema( From 3c0976aecb0f917477feef4e9ed865997c2bb106 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 13:46:17 -0800 Subject: [PATCH 197/536] feat: Add support for Python 3.12 (#1736) * chore(python): Add Python 3.12 Source-Link: https://github.com/googleapis/synthtool/commit/af16e6d4672cc7b400f144de2fc3068b54ff47d2 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:bacc3af03bff793a03add584537b36b5644342931ad989e3ba1171d3bd5399f5 * add trove classifier for python 3.12 * update kokoro configs * Add python 3.12 to noxfile.py * update georaphy sample requirements * update geography samples requirements * add testing constraint file for 3.12 * remove repr test --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou Co-authored-by: Linchin --- .github/.OwlBot.lock.yaml | 4 +- ...deps-3.11.cfg => prerelease-deps-3.12.cfg} | 2 +- .../{snippets-3.11.cfg => snippets-3.12.cfg} | 2 +- .../{system-3.11.cfg => system-3.12.cfg} | 2 +- .kokoro/samples/python3.12/common.cfg | 40 +++++++++++++++++++ .kokoro/samples/python3.12/continuous.cfg | 6 +++ .kokoro/samples/python3.12/periodic-head.cfg | 11 +++++ .kokoro/samples/python3.12/periodic.cfg | 6 +++ .kokoro/samples/python3.12/presubmit.cfg | 6 +++ CONTRIBUTING.rst | 6 ++- noxfile.py | 10 ++--- samples/desktopapp/noxfile.py | 2 +- samples/geography/noxfile.py | 2 +- samples/geography/requirements.txt | 14 ++++--- samples/magics/noxfile.py | 2 +- samples/notebooks/noxfile.py | 2 +- samples/snippets/noxfile.py | 2 +- setup.py | 1 + testing/constraints-3.12.txt | 0 tests/unit/test_query.py | 1 - 20 files changed, 97 insertions(+), 24 deletions(-) rename .kokoro/presubmit/{prerelease-deps-3.11.cfg => prerelease-deps-3.12.cfg} (77%) rename .kokoro/presubmit/{snippets-3.11.cfg => snippets-3.12.cfg} (81%) rename .kokoro/presubmit/{system-3.11.cfg => system-3.12.cfg} (82%) create mode 100644 .kokoro/samples/python3.12/common.cfg create mode 100644 .kokoro/samples/python3.12/continuous.cfg create mode 100644 .kokoro/samples/python3.12/periodic-head.cfg create mode 100644 .kokoro/samples/python3.12/periodic.cfg create mode 100644 .kokoro/samples/python3.12/presubmit.cfg create mode 100644 testing/constraints-3.12.txt diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 453b540c1..eb4d9f794 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:caffe0a9277daeccc4d1de5c9b55ebba0901b57c2f713ec9c876b0d4ec064f61 -# created: 2023-11-08T19:46:45.022803742Z + digest: sha256:bacc3af03bff793a03add584537b36b5644342931ad989e3ba1171d3bd5399f5 +# created: 2023-11-23T18:17:28.105124211Z diff --git a/.kokoro/presubmit/prerelease-deps-3.11.cfg b/.kokoro/presubmit/prerelease-deps-3.12.cfg similarity index 77% rename from .kokoro/presubmit/prerelease-deps-3.11.cfg rename to .kokoro/presubmit/prerelease-deps-3.12.cfg index 1e19f1239..ece962a17 100644 --- a/.kokoro/presubmit/prerelease-deps-3.11.cfg +++ b/.kokoro/presubmit/prerelease-deps-3.12.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.11" + value: "prerelease_deps-3.12" } diff --git a/.kokoro/presubmit/snippets-3.11.cfg b/.kokoro/presubmit/snippets-3.12.cfg similarity index 81% rename from .kokoro/presubmit/snippets-3.11.cfg rename to .kokoro/presubmit/snippets-3.12.cfg index 74af6dd07..1381e8323 100644 --- a/.kokoro/presubmit/snippets-3.11.cfg +++ b/.kokoro/presubmit/snippets-3.12.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-3.11" + value: "snippets-3.12" } diff --git a/.kokoro/presubmit/system-3.11.cfg b/.kokoro/presubmit/system-3.12.cfg similarity index 82% rename from .kokoro/presubmit/system-3.11.cfg rename to .kokoro/presubmit/system-3.12.cfg index 5ff31a603..789455bd6 100644 --- a/.kokoro/presubmit/system-3.11.cfg +++ b/.kokoro/presubmit/system-3.12.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "system-3.11" + value: "system-3.12" } diff --git a/.kokoro/samples/python3.12/common.cfg b/.kokoro/samples/python3.12/common.cfg new file mode 100644 index 000000000..6eb699edd --- /dev/null +++ b/.kokoro/samples/python3.12/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.12" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-312" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.12/continuous.cfg b/.kokoro/samples/python3.12/continuous.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.12/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.12/periodic-head.cfg b/.kokoro/samples/python3.12/periodic-head.cfg new file mode 100644 index 000000000..5aa01bab5 --- /dev/null +++ b/.kokoro/samples/python3.12/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.12/periodic.cfg b/.kokoro/samples/python3.12/periodic.cfg new file mode 100644 index 000000000..71cd1e597 --- /dev/null +++ b/.kokoro/samples/python3.12/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/.kokoro/samples/python3.12/presubmit.cfg b/.kokoro/samples/python3.12/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.12/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 5dc30a1f8..7be61e6b6 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9, 3.10 and 3.11 on both UNIX and Windows. + 3.7, 3.8, 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.11 -- -k + $ nox -s unit-3.12 -- -k .. note:: @@ -226,12 +226,14 @@ We support: - `Python 3.9`_ - `Python 3.10`_ - `Python 3.11`_ +- `Python 3.12`_ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ +.. _Python 3.12: https://docs.python.org/3.12/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/noxfile.py b/noxfile.py index 246952728..ab7803040 100644 --- a/noxfile.py +++ b/noxfile.py @@ -37,8 +37,8 @@ ) DEFAULT_PYTHON_VERSION = "3.8" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11"] -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11", "3.12"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() # 'docfx' is excluded since it only needs to run in 'docs-presubmit' @@ -81,7 +81,7 @@ def default(session, install_extras=True): constraints_path, ) - if install_extras and session.python == "3.11": + if install_extras and session.python in ["3.11", "3.12"]: install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" elif install_extras: install_target = ".[all]" @@ -187,7 +187,7 @@ def system(session): # Data Catalog needed for the column ACL test with a real Policy Tag. session.install("google-cloud-datacatalog", "-c", constraints_path) - if session.python == "3.11": + if session.python in ["3.11", "3.12"]: extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" @@ -251,7 +251,7 @@ def snippets(session): session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) - if session.python == "3.11": + if session.python in ["3.11", "3.12"]: extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: extras = "[all]" diff --git a/samples/desktopapp/noxfile.py b/samples/desktopapp/noxfile.py index 1224cbe21..3b7135946 100644 --- a/samples/desktopapp/noxfile.py +++ b/samples/desktopapp/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 1224cbe21..3b7135946 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 867f44b35..d6cea7ec5 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -7,10 +7,11 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.1.1 -Fiona==1.9.4.post1 -geojson==3.0.1 +Fiona==1.9.5 +geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' -geopandas==0.13.2; python_version >= '3.8' +geopandas==0.13.2; python_version == '3.8' +geopandas==0.14.1; python_version >= '3.9' google-api-core==2.11.1 google-auth==2.22.0 google-cloud-bigquery==3.11.4 @@ -19,9 +20,10 @@ google-cloud-core==2.3.3 google-crc32c==1.5.0 google-resumable-media==2.5.0 googleapis-common-protos==1.60.0 -grpcio==1.57.0 +grpcio==1.59.0 idna==3.4 -libcst==1.0.1 +libcst==1.0.1; python_version == '3.7' +libcst==1.1.0; python_version >= '3.8' munch==4.0.0 mypy-extensions==1.0.0 packaging==23.1 @@ -39,7 +41,7 @@ pytz==2023.3 PyYAML==6.0.1 requests==2.31.0 rsa==4.9 -Shapely==2.0.1 +Shapely==2.0.2 six==1.16.0 typing-extensions==4.7.1 typing-inspect==0.9.0 diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index 1224cbe21..3b7135946 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/notebooks/noxfile.py b/samples/notebooks/noxfile.py index 1224cbe21..3b7135946 100644 --- a/samples/notebooks/noxfile.py +++ b/samples/notebooks/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 1224cbe21..3b7135946 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/setup.py b/setup.py index f21bb586d..9fbc91ecb 100644 --- a/setup.py +++ b/setup.py @@ -133,6 +133,7 @@ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Operating System :: OS Independent", "Topic :: Internet", ], diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 949c1993b..7c3438567 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1281,7 +1281,6 @@ def test___repr__(self): field1 = self._make_one("test", _make_subparam("field1", "STRING", "hello")) got = repr(field1) self.assertIn("StructQueryParameter", got) - self.assertIn("'field1', 'STRING'", got) self.assertIn("'field1': 'hello'", got) From ab737969c000683ac52c499ee16ac76bd4cbdc84 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 5 Dec 2023 09:54:15 -0800 Subject: [PATCH 198/536] chore: bump cryptography from 41.0.5 to 41.0.6 in /synthtool/gcp/templates/python_library/.kokoro (#1740) Source-Link: https://github.com/googleapis/synthtool/commit/9367caadcbb30b5b2719f30eb00c44cc913550ed Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2f155882785883336b4468d5218db737bb1d10c9cea7cb62219ad16fe248c03c Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/requirements.txt | 48 +++++++++++++++++++-------------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index eb4d9f794..773c1dfd2 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:bacc3af03bff793a03add584537b36b5644342931ad989e3ba1171d3bd5399f5 -# created: 2023-11-23T18:17:28.105124211Z + digest: sha256:2f155882785883336b4468d5218db737bb1d10c9cea7cb62219ad16fe248c03c +# created: 2023-11-29T14:54:29.548172703Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 8957e2110..e5c1ffca9 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -93,30 +93,30 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==41.0.5 \ - --hash=sha256:0c327cac00f082013c7c9fb6c46b7cc9fa3c288ca702c74773968173bda421bf \ - --hash=sha256:0d2a6a598847c46e3e321a7aef8af1436f11c27f1254933746304ff014664d84 \ - --hash=sha256:227ec057cd32a41c6651701abc0328135e472ed450f47c2766f23267b792a88e \ - --hash=sha256:22892cc830d8b2c89ea60148227631bb96a7da0c1b722f2aac8824b1b7c0b6b8 \ - --hash=sha256:392cb88b597247177172e02da6b7a63deeff1937fa6fec3bbf902ebd75d97ec7 \ - --hash=sha256:3be3ca726e1572517d2bef99a818378bbcf7d7799d5372a46c79c29eb8d166c1 \ - --hash=sha256:573eb7128cbca75f9157dcde974781209463ce56b5804983e11a1c462f0f4e88 \ - --hash=sha256:580afc7b7216deeb87a098ef0674d6ee34ab55993140838b14c9b83312b37b86 \ - --hash=sha256:5a70187954ba7292c7876734183e810b728b4f3965fbe571421cb2434d279179 \ - --hash=sha256:73801ac9736741f220e20435f84ecec75ed70eda90f781a148f1bad546963d81 \ - --hash=sha256:7d208c21e47940369accfc9e85f0de7693d9a5d843c2509b3846b2db170dfd20 \ - --hash=sha256:8254962e6ba1f4d2090c44daf50a547cd5f0bf446dc658a8e5f8156cae0d8548 \ - --hash=sha256:88417bff20162f635f24f849ab182b092697922088b477a7abd6664ddd82291d \ - --hash=sha256:a48e74dad1fb349f3dc1d449ed88e0017d792997a7ad2ec9587ed17405667e6d \ - --hash=sha256:b948e09fe5fb18517d99994184854ebd50b57248736fd4c720ad540560174ec5 \ - --hash=sha256:c707f7afd813478e2019ae32a7c49cd932dd60ab2d2a93e796f68236b7e1fbf1 \ - --hash=sha256:d38e6031e113b7421db1de0c1b1f7739564a88f1684c6b89234fbf6c11b75147 \ - --hash=sha256:d3977f0e276f6f5bf245c403156673db103283266601405376f075c849a0b936 \ - --hash=sha256:da6a0ff8f1016ccc7477e6339e1d50ce5f59b88905585f77193ebd5068f1e797 \ - --hash=sha256:e270c04f4d9b5671ebcc792b3ba5d4488bf7c42c3c241a3748e2599776f29696 \ - --hash=sha256:e886098619d3815e0ad5790c973afeee2c0e6e04b4da90b88e6bd06e2a0b1b72 \ - --hash=sha256:ec3b055ff8f1dce8e6ef28f626e0972981475173d7973d63f271b29c8a2897da \ - --hash=sha256:fba1e91467c65fe64a82c689dc6cf58151158993b13eb7a7f3f4b7f395636723 +cryptography==41.0.6 \ + --hash=sha256:068bc551698c234742c40049e46840843f3d98ad7ce265fd2bd4ec0d11306596 \ + --hash=sha256:0f27acb55a4e77b9be8d550d762b0513ef3fc658cd3eb15110ebbcbd626db12c \ + --hash=sha256:2132d5865eea673fe6712c2ed5fb4fa49dba10768bb4cc798345748380ee3660 \ + --hash=sha256:3288acccef021e3c3c10d58933f44e8602cf04dba96d9796d70d537bb2f4bbc4 \ + --hash=sha256:35f3f288e83c3f6f10752467c48919a7a94b7d88cc00b0668372a0d2ad4f8ead \ + --hash=sha256:398ae1fc711b5eb78e977daa3cbf47cec20f2c08c5da129b7a296055fbb22aed \ + --hash=sha256:422e3e31d63743855e43e5a6fcc8b4acab860f560f9321b0ee6269cc7ed70cc3 \ + --hash=sha256:48783b7e2bef51224020efb61b42704207dde583d7e371ef8fc2a5fb6c0aabc7 \ + --hash=sha256:4d03186af98b1c01a4eda396b137f29e4e3fb0173e30f885e27acec8823c1b09 \ + --hash=sha256:5daeb18e7886a358064a68dbcaf441c036cbdb7da52ae744e7b9207b04d3908c \ + --hash=sha256:60e746b11b937911dc70d164060d28d273e31853bb359e2b2033c9e93e6f3c43 \ + --hash=sha256:742ae5e9a2310e9dade7932f9576606836ed174da3c7d26bc3d3ab4bd49b9f65 \ + --hash=sha256:7e00fb556bda398b99b0da289ce7053639d33b572847181d6483ad89835115f6 \ + --hash=sha256:85abd057699b98fce40b41737afb234fef05c67e116f6f3650782c10862c43da \ + --hash=sha256:8efb2af8d4ba9dbc9c9dd8f04d19a7abb5b49eab1f3694e7b5a16a5fc2856f5c \ + --hash=sha256:ae236bb8760c1e55b7a39b6d4d32d2279bc6c7c8500b7d5a13b6fb9fc97be35b \ + --hash=sha256:afda76d84b053923c27ede5edc1ed7d53e3c9f475ebaf63c68e69f1403c405a8 \ + --hash=sha256:b27a7fd4229abef715e064269d98a7e2909ebf92eb6912a9603c7e14c181928c \ + --hash=sha256:b648fe2a45e426aaee684ddca2632f62ec4613ef362f4d681a9a6283d10e079d \ + --hash=sha256:c5a550dc7a3b50b116323e3d376241829fd326ac47bc195e04eb33a8170902a9 \ + --hash=sha256:da46e2b5df770070412c46f87bac0849b8d685c5f2679771de277a422c7d0b86 \ + --hash=sha256:f39812f70fc5c71a15aa3c97b2bbe213c3f2a460b79bd21c40d033bb34a9bf36 \ + --hash=sha256:ff369dd19e8fe0528b02e8df9f2aeb2479f89b1270d90f96a63500afe9af5cae # via # gcp-releasetool # secretstorage From 8482f4759ce3c4b00fa06a7f306a2ac4d4ee8eb7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 8 Dec 2023 16:13:49 -0600 Subject: [PATCH 199/536] fix: keep `RowIterator.total_rows` populated after iteration (#1748) * fix: keep `RowIterator.total_rows` populated after iteration This was being reset in some cases when the rows were all available in the first page of results. * Update google/cloud/bigquery/table.py Co-authored-by: Anthonios Partheniou --------- Co-authored-by: Anthonios Partheniou --- google/cloud/bigquery/table.py | 4 ++-- tests/unit/test_table.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index dca9f7962..70e601714 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2997,9 +2997,9 @@ def _rows_page_start(iterator, page, response): page._columns = _row_iterator_page_columns(iterator._schema, response) total_rows = response.get("totalRows") + # Don't reset total_rows if it's not present in the next API response. if total_rows is not None: - total_rows = int(total_rows) - iterator._total_rows = total_rows + iterator._total_rows = int(total_rows) # pylint: enable=unused-argument diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 85f335dd1..9b3d4fe84 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2201,9 +2201,18 @@ def test_iterate_with_cached_first_page(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one( - _mock_client(), api_request, path, schema, first_page_response=first_page + _mock_client(), + api_request, + path, + schema, + first_page_response=first_page, + total_rows=4, ) + self.assertEqual(row_iterator.total_rows, 4) rows = list(row_iterator) + # Total rows should be maintained, even though subsequent API calls + # don't include it. + self.assertEqual(row_iterator.total_rows, 4) self.assertEqual(len(rows), 4) self.assertEqual(rows[0].age, 27) self.assertEqual(rows[1].age, 28) From 89a647e19fe5d7302c0a39bba77a155635c5c29d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 8 Dec 2023 16:46:43 -0600 Subject: [PATCH 200/536] feat: add `Client.query_and_wait` which directly returns a `RowIterator` of results (#1722) * perf: use the first page a results when `query(api_method="QUERY")` * add tests * respect max_results with cached page * respect page_size, also avoid bqstorage if almost fully downloaded * skip true test if bqstorage not installed * coverage * feat: add `Client.query_and_wait` which directly returns a `RowIterator` of results Set the `QUERY_PREVIEW_ENABLED=TRUE` environment variable to use this with the new JOB_CREATION_OPTIONAL mode (currently in preview). * implement basic query_and_wait and add code sample to test * avoid duplicated QueryJob construction * update unit tests * fix merge conflict in rowiterator * support max_results, add tests * retry tests * unit test coverage * dont retry twice * fix mypy_samples session * consolidate docstrings for query_and_wait * remove mention of job ID * fallback to jobs.insert for unsupported features * distinguish API timeout from wait timeout * add test for jobs.insert fallback * populate default job config * refactor default config * add coverage for job_config * cancel job if hasn't finished * mypy * allow unrealeased features in samples * fix for 3.12 * fix: keep `RowIterator.total_rows` populated after iteration This was being reset in some cases when the rows were all available in the first page of results. * Update google/cloud/bigquery/table.py Co-authored-by: Anthonios Partheniou * fix comments --------- Co-authored-by: Anthonios Partheniou --- google/cloud/bigquery/_job_helpers.py | 316 +++++++++- google/cloud/bigquery/client.py | 152 ++++- google/cloud/bigquery/job/base.py | 10 +- google/cloud/bigquery/job/query.py | 10 +- google/cloud/bigquery/query.py | 12 + noxfile.py | 8 +- samples/client_query.py | 3 + samples/snippets/client_query.py | 37 ++ samples/snippets/client_query_test.py | 38 ++ tests/unit/test__job_helpers.py | 872 +++++++++++++++++++++++++- tests/unit/test_client.py | 162 ++++- tests/unit/test_query.py | 10 + 12 files changed, 1550 insertions(+), 80 deletions(-) create mode 100644 samples/snippets/client_query.py create mode 100644 samples/snippets/client_query_test.py diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 7992f28b6..095de4faa 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -12,9 +12,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Helpers for interacting with the job REST APIs from the client.""" +"""Helpers for interacting with the job REST APIs from the client. + +For queries, there are three cases to consider: + +1. jobs.insert: This always returns a job resource. +2. jobs.query, jobCreationMode=JOB_CREATION_REQUIRED: + This sometimes can return the results inline, but always includes a job ID. +3. jobs.query, jobCreationMode=JOB_CREATION_OPTIONAL: + This sometimes doesn't create a job at all, instead returning the results. + For better debugging, an auto-generated query ID is included in the + response. + +Client.query() calls either (1) or (2), depending on what the user provides +for the api_method parameter. query() always returns a QueryJob object, which +can retry the query when the query job fails for a retriable reason. + +Client.query_and_wait() calls (3). This returns a RowIterator that may wrap +local results from the response or may wrap a query job containing multiple +pages of results. Even though query_and_wait() waits for the job to complete, +we still need a separate job_retry object because there are different +predicates where it is safe to generate a new query ID. +""" import copy +import functools +import os import uuid from typing import Any, Dict, TYPE_CHECKING, Optional @@ -23,6 +46,7 @@ from google.cloud.bigquery import job import google.cloud.bigquery.query +from google.cloud.bigquery import table # Avoid circular imports if TYPE_CHECKING: # pragma: NO COVER @@ -59,6 +83,25 @@ def make_job_id(job_id: Optional[str] = None, prefix: Optional[str] = None) -> s return str(uuid.uuid4()) +def job_config_with_defaults( + job_config: Optional[job.QueryJobConfig], + default_job_config: Optional[job.QueryJobConfig], +) -> Optional[job.QueryJobConfig]: + """Create a copy of `job_config`, replacing unset values with those from + `default_job_config`. + """ + if job_config is None: + return default_job_config + + if default_job_config is None: + return job_config + + # Both job_config and default_job_config are not None, so make a copy of + # job_config merged with default_job_config. Anything already explicitly + # set on job_config should not be replaced. + return job_config._fill_from_default(default_job_config) + + def query_jobs_insert( client: "Client", query: str, @@ -67,9 +110,9 @@ def query_jobs_insert( job_id_prefix: Optional[str], location: Optional[str], project: str, - retry: retries.Retry, + retry: Optional[retries.Retry], timeout: Optional[float], - job_retry: retries.Retry, + job_retry: Optional[retries.Retry], ) -> job.QueryJob: """Initiate a query using jobs.insert. @@ -123,7 +166,13 @@ def do_query(): return future -def _to_query_request(job_config: Optional[job.QueryJobConfig]) -> Dict[str, Any]: +def _to_query_request( + job_config: Optional[job.QueryJobConfig] = None, + *, + query: str, + location: Optional[str] = None, + timeout: Optional[float] = None, +) -> Dict[str, Any]: """Transform from Job resource to QueryRequest resource. Most of the keys in job.configuration.query are in common with @@ -150,6 +199,15 @@ def _to_query_request(job_config: Optional[job.QueryJobConfig]) -> Dict[str, Any request_body.setdefault("formatOptions", {}) request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore + if timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + request_body["timeoutMs"] = max(0, int(1000 * timeout) - _TIMEOUT_BUFFER_MILLIS) + + if location is not None: + request_body["location"] = location + + request_body["query"] = query + return request_body @@ -207,6 +265,10 @@ def _to_query_job( return query_job +def _to_query_path(project: str) -> str: + return f"/projects/{project}/queries" + + def query_jobs_query( client: "Client", query: str, @@ -217,18 +279,14 @@ def query_jobs_query( timeout: Optional[float], job_retry: retries.Retry, ) -> job.QueryJob: - """Initiate a query using jobs.query. + """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query """ - path = f"/projects/{project}/queries" - request_body = _to_query_request(job_config) - - if timeout is not None: - # Subtract a buffer for context switching, network latency, etc. - request_body["timeoutMs"] = max(0, int(1000 * timeout) - _TIMEOUT_BUFFER_MILLIS) - request_body["location"] = location - request_body["query"] = query + path = _to_query_path(project) + request_body = _to_query_request( + query=query, job_config=job_config, location=location, timeout=timeout + ) def do_query(): request_body["requestId"] = make_job_id() @@ -253,3 +311,235 @@ def do_query(): future._job_retry = job_retry return future + + +def query_and_wait( + client: "Client", + query: str, + *, + job_config: Optional[job.QueryJobConfig], + location: Optional[str], + project: str, + api_timeout: Optional[float] = None, + wait_timeout: Optional[float] = None, + retry: Optional[retries.Retry], + job_retry: Optional[retries.Retry], + page_size: Optional[int] = None, + max_results: Optional[int] = None, +) -> table.RowIterator: + """Run the query, wait for it to finish, and return the results. + + While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the + ``jobs.query`` REST API, use the default ``jobCreationMode`` unless + the environment variable ``QUERY_PREVIEW_ENABLED=true``. After + ``jobCreationMode`` is GA, this method will always use + ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + + Args: + client: + BigQuery client to make API calls. + query (str): + SQL query to be executed. Defaults to the standard SQL + dialect. Use the ``job_config`` parameter to change dialects. + job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): + Extra configuration options for the job. + To override any options that were previously set in + the ``default_query_job_config`` given to the + ``Client`` constructor, manually set those options to ``None``, + or whatever value is preferred. + location (Optional[str]): + Location where to run the job. Must match the location of the + table used in the query as well as the destination table. + project (Optional[str]): + Project ID of the project of where to run the job. Defaults + to the client's project. + api_timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + wait_timeout (Optional[float]): + The number of seconds to wait for the query to finish. If the + query doesn't finish before this timeout, the client attempts + to cancel the query. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. This only applies to making RPC + calls. It isn't used to retry failed jobs. This has + a reasonable default that should only be overridden + with care. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. Not all jobs can be retried. + page_size (Optional[int]): + The maximum number of rows in each page of results from this + request. Non-positive values are ignored. + max_results (Optional[int]): + The maximum total number of rows from this request. + + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. During each + page, the iterator will have the ``total_rows`` attribute + set, which counts the total number of rows **in the result + set** (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). + + If the query is a special query that produces no results, e.g. + a DDL query, an ``_EmptyRowIterator`` instance is returned. + + Raises: + TypeError: + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.QueryJobConfig` + class. + """ + # Some API parameters aren't supported by the jobs.query API. In these + # cases, fallback to a jobs.insert call. + if not _supported_by_jobs_query(job_config): + return _wait_or_cancel( + query_jobs_insert( + client=client, + query=query, + job_id=None, + job_id_prefix=None, + job_config=job_config, + location=location, + project=project, + retry=retry, + timeout=api_timeout, + job_retry=job_retry, + ), + api_timeout=api_timeout, + wait_timeout=wait_timeout, + retry=retry, + page_size=page_size, + max_results=max_results, + ) + + path = _to_query_path(project) + request_body = _to_query_request( + query=query, job_config=job_config, location=location, timeout=api_timeout + ) + + if page_size is not None and max_results is not None: + request_body["maxResults"] = min(page_size, max_results) + elif page_size is not None or max_results is not None: + request_body["maxResults"] = page_size or max_results + + if os.getenv("QUERY_PREVIEW_ENABLED", "").casefold() == "true": + request_body["jobCreationMode"] = "JOB_CREATION_OPTIONAL" + + def do_query(): + request_body["requestId"] = make_job_id() + span_attributes = {"path": path} + + # For easier testing, handle the retries ourselves. + if retry is not None: + response = retry(client._call_api)( + retry=None, # We're calling the retry decorator ourselves. + span_name="BigQuery.query", + span_attributes=span_attributes, + method="POST", + path=path, + data=request_body, + timeout=api_timeout, + ) + else: + response = client._call_api( + retry=None, + span_name="BigQuery.query", + span_attributes=span_attributes, + method="POST", + path=path, + data=request_body, + timeout=api_timeout, + ) + + # Even if we run with JOB_CREATION_OPTIONAL, if there are more pages + # to fetch, there will be a job ID for jobs.getQueryResults. + query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + response + ) + page_token = query_results.page_token + more_pages = page_token is not None + + if more_pages or not query_results.complete: + # TODO(swast): Avoid a call to jobs.get in some cases (few + # remaining pages) by waiting for the query to finish and calling + # client._list_rows_from_query_results directly. Need to update + # RowIterator to fetch destination table via the job ID if needed. + return _wait_or_cancel( + _to_query_job(client, query, job_config, response), + api_timeout=api_timeout, + wait_timeout=wait_timeout, + retry=retry, + page_size=page_size, + max_results=max_results, + ) + + return table.RowIterator( + client=client, + api_request=functools.partial(client._call_api, retry, timeout=api_timeout), + path=None, + schema=query_results.schema, + max_results=max_results, + page_size=page_size, + total_rows=query_results.total_rows, + first_page_response=response, + location=query_results.location, + job_id=query_results.job_id, + query_id=query_results.query_id, + project=query_results.project, + ) + + if job_retry is not None: + return job_retry(do_query)() + else: + return do_query() + + +def _supported_by_jobs_query(job_config: Optional[job.QueryJobConfig]) -> bool: + """True if jobs.query can be used. False if jobs.insert is needed.""" + if job_config is None: + return True + + return ( + # These features aren't supported by jobs.query. + job_config.clustering_fields is None + and job_config.destination is None + and job_config.destination_encryption_configuration is None + and job_config.range_partitioning is None + and job_config.table_definitions is None + and job_config.time_partitioning is None + ) + + +def _wait_or_cancel( + job: job.QueryJob, + api_timeout: Optional[float], + wait_timeout: Optional[float], + retry: Optional[retries.Retry], + page_size: Optional[int], + max_results: Optional[int], +) -> table.RowIterator: + """Wait for a job to complete and return the results. + + If we can't return the results within the ``wait_timeout``, try to cancel + the job. + """ + try: + return job.result( + page_size=page_size, + max_results=max_results, + retry=retry, + timeout=wait_timeout, + ) + except Exception: + # Attempt to cancel the job since we can't return the results. + try: + job.cancel(retry=retry, timeout=api_timeout) + except Exception: + # Don't eat the original exception if cancel fails. + pass + raise diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 488a9ad29..284ccddb5 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -255,23 +255,31 @@ def __init__( self._connection = Connection(self, **kw_args) self._location = location - self._default_query_job_config = copy.deepcopy(default_query_job_config) self._default_load_job_config = copy.deepcopy(default_load_job_config) + # Use property setter so validation can run. + self.default_query_job_config = default_query_job_config + @property def location(self): """Default location for jobs / datasets / tables.""" return self._location @property - def default_query_job_config(self): - """Default ``QueryJobConfig``. - Will be merged into job configs passed into the ``query`` method. + def default_query_job_config(self) -> Optional[QueryJobConfig]: + """Default ``QueryJobConfig`` or ``None``. + + Will be merged into job configs passed into the ``query`` or + ``query_and_wait`` methods. """ return self._default_query_job_config @default_query_job_config.setter - def default_query_job_config(self, value: QueryJobConfig): + def default_query_job_config(self, value: Optional[QueryJobConfig]): + if value is not None: + _verify_job_config_type( + value, QueryJobConfig, param_name="default_query_job_config" + ) self._default_query_job_config = copy.deepcopy(value) @property @@ -3355,26 +3363,12 @@ def query( if location is None: location = self.location - if self._default_query_job_config: - if job_config: - _verify_job_config_type( - job_config, google.cloud.bigquery.job.QueryJobConfig - ) - # anything that's not defined on the incoming - # that is in the default, - # should be filled in with the default - # the incoming therefore has precedence - # - # Note that _fill_from_default doesn't mutate the receiver - job_config = job_config._fill_from_default( - self._default_query_job_config - ) - else: - _verify_job_config_type( - self._default_query_job_config, - google.cloud.bigquery.job.QueryJobConfig, - ) - job_config = self._default_query_job_config + if job_config is not None: + _verify_job_config_type(job_config, QueryJobConfig) + + job_config = _job_helpers.job_config_with_defaults( + job_config, self._default_query_job_config + ) # Note that we haven't modified the original job_config (or # _default_query_job_config) up to this point. @@ -3405,6 +3399,112 @@ def query( else: raise ValueError(f"Got unexpected value for api_method: {repr(api_method)}") + def query_and_wait( + self, + query, + *, + job_config: Optional[QueryJobConfig] = None, + location: Optional[str] = None, + project: Optional[str] = None, + api_timeout: TimeoutType = DEFAULT_TIMEOUT, + wait_timeout: TimeoutType = None, + retry: retries.Retry = DEFAULT_RETRY, + job_retry: retries.Retry = DEFAULT_JOB_RETRY, + page_size: Optional[int] = None, + max_results: Optional[int] = None, + ) -> RowIterator: + """Run the query, wait for it to finish, and return the results. + + While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the + ``jobs.query`` REST API, use the default ``jobCreationMode`` unless + the environment variable ``QUERY_PREVIEW_ENABLED=true``. After + ``jobCreationMode`` is GA, this method will always use + ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + + Args: + query (str): + SQL query to be executed. Defaults to the standard SQL + dialect. Use the ``job_config`` parameter to change dialects. + job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): + Extra configuration options for the job. + To override any options that were previously set in + the ``default_query_job_config`` given to the + ``Client`` constructor, manually set those options to ``None``, + or whatever value is preferred. + location (Optional[str]): + Location where to run the job. Must match the location of the + table used in the query as well as the destination table. + project (Optional[str]): + Project ID of the project of where to run the job. Defaults + to the client's project. + api_timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + wait_timeout (Optional[float]): + The number of seconds to wait for the query to finish. If the + query doesn't finish before this timeout, the client attempts + to cancel the query. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. This only applies to making RPC + calls. It isn't used to retry failed jobs. This has + a reasonable default that should only be overridden + with care. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. Not all jobs can be retried. + page_size (Optional[int]): + The maximum number of rows in each page of results from this + request. Non-positive values are ignored. + max_results (Optional[int]): + The maximum total number of rows from this request. + + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. During each + page, the iterator will have the ``total_rows`` attribute + set, which counts the total number of rows **in the result + set** (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). + + If the query is a special query that produces no results, e.g. + a DDL query, an ``_EmptyRowIterator`` instance is returned. + + Raises: + TypeError: + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.QueryJobConfig` + class. + """ + if project is None: + project = self.project + + if location is None: + location = self.location + + if job_config is not None: + _verify_job_config_type(job_config, QueryJobConfig) + + job_config = _job_helpers.job_config_with_defaults( + job_config, self._default_query_job_config + ) + + return _job_helpers.query_and_wait( + self, + query, + job_config=job_config, + location=location, + project=project, + api_timeout=api_timeout, + wait_timeout=wait_timeout, + retry=retry, + job_retry=job_retry, + page_size=page_size, + max_results=max_results, + ) + def insert_rows( self, table: Union[Table, TableReference, str], @@ -3853,7 +3953,7 @@ def _list_rows_from_query_results( job_id: str, location: str, project: str, - schema: SchemaField, + schema: Sequence[SchemaField], total_rows: Optional[int] = None, destination: Optional[Union[Table, TableReference, TableListItem, str]] = None, max_results: Optional[int] = None, diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 78df9142f..97e0ea3bd 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -21,6 +21,7 @@ import typing from typing import ClassVar, Dict, Optional, Sequence +from google.api_core import retry as retries from google.api_core import exceptions import google.api_core.future.polling @@ -28,9 +29,6 @@ from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery._helpers import _int_or_none -if typing.TYPE_CHECKING: # pragma: NO COVER - from google.api_core import retry as retries - _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" @@ -825,7 +823,7 @@ def reload( def cancel( self, client=None, - retry: "retries.Retry" = DEFAULT_RETRY, + retry: Optional[retries.Retry] = DEFAULT_RETRY, timeout: Optional[float] = None, ) -> bool: """API call: cancel job via a POST request @@ -921,9 +919,9 @@ def done( self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE - def result( # type: ignore # (signature complaint) + def result( # type: ignore # (incompatible with supertype) self, - retry: "retries.Retry" = DEFAULT_RETRY, + retry: Optional[retries.Retry] = DEFAULT_RETRY, timeout: Optional[float] = None, ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 79cd207a1..4a529f949 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -22,6 +22,7 @@ from google.api_core import exceptions from google.api_core.future import polling as polling_future +from google.api_core import retry as retries import requests from google.cloud.bigquery.dataset import Dataset @@ -69,7 +70,6 @@ import pandas # type: ignore import geopandas # type: ignore import pyarrow # type: ignore - from google.api_core import retry as retries from google.cloud import bigquery_storage from google.cloud.bigquery.client import Client from google.cloud.bigquery.table import RowIterator @@ -779,7 +779,7 @@ def to_api_repr(self) -> dict: resource = copy.deepcopy(self._properties) # Query parameters have an addition property associated with them # to indicate if the query is using named or positional parameters. - query_parameters = resource["query"].get("queryParameters") + query_parameters = resource.get("query", {}).get("queryParameters") if query_parameters: if query_parameters[0].get("name") is None: resource["query"]["parameterMode"] = "POSITIONAL" @@ -1469,14 +1469,14 @@ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): except exceptions.GoogleAPIError as exc: self.set_exception(exc) - def result( # type: ignore # (complaints about the overloaded signature) + def result( # type: ignore # (incompatible with supertype) self, page_size: Optional[int] = None, max_results: Optional[int] = None, - retry: "retries.Retry" = DEFAULT_RETRY, + retry: Optional[retries.Retry] = DEFAULT_RETRY, timeout: Optional[float] = None, start_index: Optional[int] = None, - job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, + job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 54abe95a7..43591c648 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -911,6 +911,18 @@ def job_id(self): """ return self._properties.get("jobReference", {}).get("jobId") + @property + def location(self): + """Location of the query job these results are from. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.job_reference + + Returns: + str: Job ID of the query job. + """ + return self._properties.get("jobReference", {}).get("location") + @property def query_id(self) -> Optional[str]: """[Preview] ID of a completed query. diff --git a/noxfile.py b/noxfile.py index ab7803040..41492c7f0 100644 --- a/noxfile.py +++ b/noxfile.py @@ -205,13 +205,15 @@ def system(session): @nox.session(python=DEFAULT_PYTHON_VERSION) def mypy_samples(session): """Run type checks with mypy.""" - session.install("-e", ".[all]") - session.install("pytest") for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): - session.install("-r", requirements_path) + session.install("-r", str(requirements_path)) session.install(MYPY_VERSION) + # requirements.txt might include this package. Install from source so that + # we can author samples with unreleased features. + session.install("-e", ".[all]") + # Just install the dependencies' type info directly, since "mypy --install-types" # might require an additional pass. session.install( diff --git a/samples/client_query.py b/samples/client_query.py index 4df051ee2..80eac854e 100644 --- a/samples/client_query.py +++ b/samples/client_query.py @@ -14,6 +14,9 @@ def client_query() -> None: + # TODO(swast): remove once docs in cloud.google.com have been updated to + # use samples/snippets/client_query.py + # [START bigquery_query] from google.cloud import bigquery diff --git a/samples/snippets/client_query.py b/samples/snippets/client_query.py new file mode 100644 index 000000000..ccae2e8bd --- /dev/null +++ b/samples/snippets/client_query.py @@ -0,0 +1,37 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query() -> None: + # [START bigquery_query] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + query = """ + SELECT name, SUM(number) as total_people + FROM `bigquery-public-data.usa_names.usa_1910_2013` + WHERE state = 'TX' + GROUP BY name, state + ORDER BY total_people DESC + LIMIT 20 + """ + rows = client.query_and_wait(query) # Make an API request. + + print("The query data:") + for row in rows: + # Row values can be accessed by field name or index. + print("name={}, count={}".format(row[0], row["total_people"])) + # [END bigquery_query] diff --git a/samples/snippets/client_query_test.py b/samples/snippets/client_query_test.py new file mode 100644 index 000000000..1bc83a230 --- /dev/null +++ b/samples/snippets/client_query_test.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import client_query # type: ignore + +if typing.TYPE_CHECKING: + import pytest + + +def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None: + client_query.client_query() + out, _ = capsys.readouterr() + assert "The query data:" in out + assert "name=James, count=272793" in out + + +def test_client_query_job_optional( + capsys: "pytest.CaptureFixture[str]", monkeypatch: "pytest.MonkeyPatch" +) -> None: + monkeypatch.setenv("QUERY_PREVIEW_ENABLED", "true") + + client_query.client_query() + out, _ = capsys.readouterr() + assert "The query data:" in out + assert "name=James, count=272793" in out diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 012352f4e..f2fe32d94 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -12,15 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import functools from typing import Any, Dict, Optional from unittest import mock +import freezegun +import google.api_core.exceptions from google.api_core import retry as retries import pytest from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers -from google.cloud.bigquery.job.query import QueryJob, QueryJobConfig +from google.cloud.bigquery.job import query as job_query from google.cloud.bigquery.query import ConnectionProperty, ScalarQueryParameter @@ -55,9 +58,9 @@ def make_query_response( ("job_config", "expected"), ( (None, make_query_request()), - (QueryJobConfig(), make_query_request()), + (job_query.QueryJobConfig(), make_query_request()), ( - QueryJobConfig(default_dataset="my-project.my_dataset"), + job_query.QueryJobConfig(default_dataset="my-project.my_dataset"), make_query_request( { "defaultDataset": { @@ -67,17 +70,17 @@ def make_query_response( } ), ), - (QueryJobConfig(dry_run=True), make_query_request({"dryRun": True})), + (job_query.QueryJobConfig(dry_run=True), make_query_request({"dryRun": True})), ( - QueryJobConfig(use_query_cache=False), + job_query.QueryJobConfig(use_query_cache=False), make_query_request({"useQueryCache": False}), ), ( - QueryJobConfig(use_legacy_sql=True), + job_query.QueryJobConfig(use_legacy_sql=True), make_query_request({"useLegacySql": True}), ), ( - QueryJobConfig( + job_query.QueryJobConfig( query_parameters=[ ScalarQueryParameter("named_param1", "STRING", "param-value"), ScalarQueryParameter("named_param2", "INT64", 123), @@ -102,7 +105,7 @@ def make_query_response( ), ), ( - QueryJobConfig( + job_query.QueryJobConfig( query_parameters=[ ScalarQueryParameter(None, "STRING", "param-value"), ScalarQueryParameter(None, "INT64", 123), @@ -125,7 +128,7 @@ def make_query_response( ), ), ( - QueryJobConfig( + job_query.QueryJobConfig( connection_properties=[ ConnectionProperty(key="time_zone", value="America/Chicago"), ConnectionProperty(key="session_id", value="abcd-efgh-ijkl-mnop"), @@ -141,17 +144,18 @@ def make_query_response( ), ), ( - QueryJobConfig(labels={"abc": "def"}), + job_query.QueryJobConfig(labels={"abc": "def"}), make_query_request({"labels": {"abc": "def"}}), ), ( - QueryJobConfig(maximum_bytes_billed=987654), + job_query.QueryJobConfig(maximum_bytes_billed=987654), make_query_request({"maximumBytesBilled": "987654"}), ), ), ) def test__to_query_request(job_config, expected): - result = _job_helpers._to_query_request(job_config) + result = _job_helpers._to_query_request(job_config, query="SELECT 1") + expected["query"] = "SELECT 1" assert result == expected @@ -160,7 +164,9 @@ def test__to_query_job_defaults(): response = make_query_response( job_id="test-job", project_id="some-project", location="asia-northeast1" ) - job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + job: job_query.QueryJob = _job_helpers._to_query_job( + mock_client, "query-str", None, response + ) assert job.query == "query-str" assert job._client is mock_client assert job.job_id == "test-job" @@ -175,9 +181,9 @@ def test__to_query_job_dry_run(): response = make_query_response( job_id="test-job", project_id="some-project", location="asia-northeast1" ) - job_config: QueryJobConfig = QueryJobConfig() + job_config: job_query.QueryJobConfig = job_query.QueryJobConfig() job_config.dry_run = True - job: QueryJob = _job_helpers._to_query_job( + job: job_query.QueryJob = _job_helpers._to_query_job( mock_client, "query-str", job_config, response ) assert job.dry_run is True @@ -193,7 +199,9 @@ def test__to_query_job_dry_run(): def test__to_query_job_sets_state(completed, expected_state): mock_client = mock.create_autospec(Client) response = make_query_response(completed=completed) - job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + job: job_query.QueryJob = _job_helpers._to_query_job( + mock_client, "query-str", None, response + ) assert job.state == expected_state @@ -206,7 +214,9 @@ def test__to_query_job_sets_errors(): {"message": "something else went wrong"}, ] ) - job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + job: job_query.QueryJob = _job_helpers._to_query_job( + mock_client, "query-str", None, response + ) assert len(job.errors) == 2 # If we got back a response instead of an HTTP error status code, most # likely the job didn't completely fail. @@ -313,6 +323,717 @@ def test_query_jobs_query_sets_timeout(timeout, expected_timeout): assert request["timeoutMs"] == expected_timeout +def test_query_and_wait_uses_jobs_insert(): + """With unsupported features, call jobs.insert instead of jobs.query.""" + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "query": { + "query": "SELECT 1", + }, + # Make sure the job has "started" + "status": {"state": "DONE"}, + "jobComplete": True, + } + job_config = job_query.QueryJobConfig( + destination="dest-project.dest_dset.dest_table", + ) + _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=job_config, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + + # We should call jobs.insert since jobs.query doesn't support destination. + request_path = "/projects/request-project/jobs" + client._call_api.assert_any_call( + None, # retry, + span_name="BigQuery.job.begin", + span_attributes={"path": request_path}, + job_ref=mock.ANY, + method="POST", + path=request_path, + data={ + "jobReference": { + "jobId": mock.ANY, + "projectId": "request-project", + "location": "request-location", + }, + "configuration": { + "query": { + "destinationTable": { + "projectId": "dest-project", + "datasetId": "dest_dset", + "tableId": "dest_table", + }, + "useLegacySql": False, + "query": "SELECT 1", + } + }, + }, + timeout=None, + ) + + +def test_query_and_wait_retries_job(): + freezegun.freeze_time(auto_tick_seconds=100) + client = mock.create_autospec(Client) + client._call_api.__name__ = "_call_api" + client._call_api.__qualname__ = "Client._call_api" + client._call_api.__annotations__ = {} + client._call_api.__type_params__ = () + client._call_api.side_effect = ( + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + google.api_core.exceptions.BadGateway("retry me"), + { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + }, + ) + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), + multiplier=1.0, + ).with_deadline( + 200.0 + ), # Since auto_tick_seconds is 100, we should get at least 1 retry. + job_retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.InternalServerError), + multiplier=1.0, + ).with_deadline(600.0), + ) + assert len(list(rows)) == 4 + + # For this code path, where the query has finished immediately, we should + # only be calling the jobs.query API and no other request path. + request_path = "/projects/request-project/queries" + for call in client._call_api.call_args_list: + _, kwargs = call + assert kwargs["method"] == "POST" + assert kwargs["path"] == request_path + + +@freezegun.freeze_time(auto_tick_seconds=100) +def test_query_and_wait_retries_job_times_out(): + client = mock.create_autospec(Client) + client._call_api.__name__ = "_call_api" + client._call_api.__qualname__ = "Client._call_api" + client._call_api.__annotations__ = {} + client._call_api.__type_params__ = () + client._call_api.side_effect = ( + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + ) + + with pytest.raises(google.api_core.exceptions.RetryError) as exc_info: + _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), + multiplier=1.0, + ).with_deadline( + 200.0 + ), # Since auto_tick_seconds is 100, we should get at least 1 retry. + job_retry=retries.Retry( + lambda exc: isinstance( + exc, google.api_core.exceptions.InternalServerError + ), + multiplier=1.0, + ).with_deadline(400.0), + ) + + assert isinstance( + exc_info.value.cause, google.api_core.exceptions.InternalServerError + ) + + +def test_query_and_wait_sets_job_creation_mode(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv( + "QUERY_PREVIEW_ENABLED", + # The comparison should be case insensitive. + "TrUe", + ) + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + } + _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + + # We should only call jobs.query once, no additional row requests needed. + request_path = "/projects/request-project/queries" + client._call_api.assert_called_once_with( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": request_path}, + method="POST", + path=request_path, + data={ + "query": "SELECT 1", + "location": "request-location", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + "jobCreationMode": "JOB_CREATION_OPTIONAL", + }, + timeout=None, + ) + + +def test_query_and_wait_sets_location(): + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + } + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + assert rows.location == "response-location" + + # We should only call jobs.query once, no additional row requests needed. + request_path = "/projects/request-project/queries" + client._call_api.assert_called_once_with( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": request_path}, + method="POST", + path=request_path, + data={ + "query": "SELECT 1", + "location": "request-location", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + }, + timeout=None, + ) + + +@pytest.mark.parametrize( + ("max_results", "page_size", "expected"), + [ + (10, None, 10), + (None, 11, 11), + (12, 100, 12), + (100, 13, 13), + ], +) +def test_query_and_wait_sets_max_results(max_results, page_size, expected): + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + } + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + retry=None, + job_retry=None, + page_size=page_size, + max_results=max_results, + ) + assert rows.location == "response-location" + + # We should only call jobs.query once, no additional row requests needed. + request_path = "/projects/request-project/queries" + client._call_api.assert_called_once_with( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": request_path}, + method="POST", + path=request_path, + data={ + "query": "SELECT 1", + "location": "request-location", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + "maxResults": expected, + }, + timeout=None, + ) + + +def test_query_and_wait_caches_completed_query_results_one_page(): + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "US", + }, + "jobComplete": True, + "queryId": "xyz", + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + # Even though totalRows > len(rows), we should use the presense of a + # next page token to decide if there are any more pages. + "totalRows": 8, + } + rows = _job_helpers.query_and_wait( + client, + query="SELECT full_name, age FROM people;", + job_config=None, + location=None, + project="request-project", + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + rows_list = list(rows) + assert rows.project == "response-project" + assert rows.job_id == "abc" + assert rows.location == "US" + assert rows.query_id == "xyz" + assert rows.total_rows == 8 + assert len(rows_list) == 4 + + # We should only call jobs.query once, no additional row requests needed. + request_path = "/projects/request-project/queries" + client._call_api.assert_called_once_with( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": request_path}, + method="POST", + path=request_path, + data={ + "query": "SELECT full_name, age FROM people;", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + }, + timeout=None, + ) + + +def test_query_and_wait_caches_completed_query_results_one_page_no_rows(): + client = mock.create_autospec(Client) + client._call_api.return_value = { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "US", + }, + "jobComplete": True, + "queryId": "xyz", + } + rows = _job_helpers.query_and_wait( + client, + query="CREATE TABLE abc;", + project="request-project", + job_config=None, + location=None, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + assert rows.project == "response-project" + assert rows.job_id == "abc" + assert rows.location == "US" + assert rows.query_id == "xyz" + assert list(rows) == [] + + # We should only call jobs.query once, no additional row requests needed. + request_path = "/projects/request-project/queries" + client._call_api.assert_called_once_with( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": request_path}, + method="POST", + path=request_path, + data={ + "query": "CREATE TABLE abc;", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + }, + timeout=None, + ) + + +def test_query_and_wait_caches_completed_query_results_more_pages(): + client = mock.create_autospec(Client) + client._list_rows_from_query_results = functools.partial( + Client._list_rows_from_query_results, client + ) + client._call_api.side_effect = ( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "jobComplete": True, + "queryId": "xyz", + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + # Even though totalRows <= len(rows), we should use the presense of a + # next page token to decide if there are any more pages. + "totalRows": 2, + "pageToken": "page-2", + }, + # TODO(swast): This is a case where we can avoid a call to jobs.get, + # but currently do so because the RowIterator might need the + # destination table, since results aren't fully cached. + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + }, + { + "rows": [ + {"f": [{"v": "Pebbles Phlyntstone"}, {"v": "4"}]}, + {"f": [{"v": "Bamm-Bamm Rhubble"}, {"v": "5"}]}, + {"f": [{"v": "Joseph Rockhead"}, {"v": "32"}]}, + {"f": [{"v": "Perry Masonry"}, {"v": "33"}]}, + ], + "totalRows": 3, + "pageToken": "page-3", + }, + { + "rows": [ + {"f": [{"v": "Pearl Slaghoople"}, {"v": "53"}]}, + ], + "totalRows": 4, + }, + ) + rows = _job_helpers.query_and_wait( + client, + query="SELECT full_name, age FROM people;", + project="request-project", + job_config=None, + location=None, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + assert rows.total_rows == 2 # Match the API response. + rows_list = list(rows) + assert rows.total_rows == 4 # Match the final API response. + assert len(rows_list) == 9 + + # Start the query. + jobs_query_path = "/projects/request-project/queries" + client._call_api.assert_any_call( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": jobs_query_path}, + method="POST", + path=jobs_query_path, + data={ + "query": "SELECT full_name, age FROM people;", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + }, + timeout=None, + ) + + # TODO(swast): Fetching job metadata isn't necessary in this case. + jobs_get_path = "/projects/response-project/jobs/response-job-id" + client._call_api.assert_any_call( + None, # retry + span_name="BigQuery.job.reload", + span_attributes={"path": jobs_get_path}, + job_ref=mock.ANY, + method="GET", + path=jobs_get_path, + query_params={"location": "response-location"}, + timeout=None, + ) + + # Fetch the remaining two pages. + jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" + client._call_api.assert_any_call( + None, # retry + timeout=None, + method="GET", + path=jobs_get_query_results_path, + query_params={ + "pageToken": "page-2", + "fields": "jobReference,totalRows,pageToken,rows", + "location": "response-location", + "formatOptions.useInt64Timestamp": True, + }, + ) + client._call_api.assert_any_call( + None, # retry + timeout=None, + method="GET", + path=jobs_get_query_results_path, + query_params={ + "pageToken": "page-3", + "fields": "jobReference,totalRows,pageToken,rows", + "location": "response-location", + "formatOptions.useInt64Timestamp": True, + }, + ) + + +def test_query_and_wait_incomplete_query(): + client = mock.create_autospec(Client) + client._get_query_results = functools.partial(Client._get_query_results, client) + client._list_rows_from_query_results = functools.partial( + Client._list_rows_from_query_results, client + ) + client._call_api.side_effect = ( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "jobComplete": False, + }, + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "jobComplete": True, + "totalRows": 2, + "queryId": "xyz", + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + }, + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + }, + { + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + # Even though totalRows <= len(rows), we should use the presense of a + # next page token to decide if there are any more pages. + "totalRows": 2, + "pageToken": "page-2", + }, + { + "rows": [ + {"f": [{"v": "Pearl Slaghoople"}, {"v": "53"}]}, + ], + }, + ) + rows = _job_helpers.query_and_wait( + client, + query="SELECT full_name, age FROM people;", + project="request-project", + job_config=None, + location=None, + retry=None, + job_retry=None, + page_size=None, + max_results=None, + ) + rows_list = list(rows) + assert rows.total_rows == 2 # Match the API response. + assert len(rows_list) == 5 + + # Start the query. + jobs_query_path = "/projects/request-project/queries" + client._call_api.assert_any_call( + None, # retry + span_name="BigQuery.query", + span_attributes={"path": jobs_query_path}, + method="POST", + path=jobs_query_path, + data={ + "query": "SELECT full_name, age FROM people;", + "useLegacySql": False, + "formatOptions": { + "useInt64Timestamp": True, + }, + "requestId": mock.ANY, + }, + timeout=None, + ) + + # Wait for the query to finish. + jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" + client._call_api.assert_any_call( + None, # retry + span_name="BigQuery.getQueryResults", + span_attributes={"path": jobs_get_query_results_path}, + method="GET", + path=jobs_get_query_results_path, + query_params={ + # job_query.QueryJob uses getQueryResults to wait for the query to finish. + # It avoids fetching the results because: + # (1) For large rows this can take a long time, much longer than + # our progress bar update frequency. + # See: https://github.com/googleapis/python-bigquery/issues/403 + # (2) Caching the first page of results uses an unexpected increase in memory. + # See: https://github.com/googleapis/python-bigquery/issues/394 + "maxResults": 0, + "location": "response-location", + }, + timeout=None, + ) + + # Fetch the job metadata in case the RowIterator needs the destination table. + jobs_get_path = "/projects/response-project/jobs/response-job-id" + client._call_api.assert_any_call( + None, # retry + span_name="BigQuery.job.reload", + span_attributes={"path": jobs_get_path}, + job_ref=mock.ANY, + method="GET", + path=jobs_get_path, + query_params={"location": "response-location"}, + timeout=None, + ) + + # Fetch the remaining two pages. + client._call_api.assert_any_call( + None, # retry + timeout=None, + method="GET", + path=jobs_get_query_results_path, + query_params={ + "fields": "jobReference,totalRows,pageToken,rows", + "location": "response-location", + "formatOptions.useInt64Timestamp": True, + }, + ) + client._call_api.assert_any_call( + None, # retry + timeout=None, + method="GET", + path=jobs_get_query_results_path, + query_params={ + "pageToken": "page-2", + "fields": "jobReference,totalRows,pageToken,rows", + "location": "response-location", + "formatOptions.useInt64Timestamp": True, + }, + ) + + def test_make_job_id_wo_suffix(): job_id = _job_helpers.make_job_id("job_id") assert job_id == "job_id" @@ -335,3 +1056,120 @@ def test_make_job_id_random(): def test_make_job_id_w_job_id_overrides_prefix(): job_id = _job_helpers.make_job_id("job_id", prefix="unused_prefix") assert job_id == "job_id" + + +@pytest.mark.parametrize( + ("job_config", "expected"), + ( + pytest.param(None, True), + pytest.param(job_query.QueryJobConfig(), True, id="default"), + pytest.param( + job_query.QueryJobConfig(use_query_cache=False), True, id="use_query_cache" + ), + pytest.param( + job_query.QueryJobConfig(maximum_bytes_billed=10_000_000), + True, + id="maximum_bytes_billed", + ), + pytest.param( + job_query.QueryJobConfig(clustering_fields=["a", "b", "c"]), + False, + id="clustering_fields", + ), + pytest.param( + job_query.QueryJobConfig(destination="p.d.t"), False, id="destination" + ), + pytest.param( + job_query.QueryJobConfig( + destination_encryption_configuration=job_query.EncryptionConfiguration( + "key" + ) + ), + False, + id="destination_encryption_configuration", + ), + ), +) +def test_supported_by_jobs_query( + job_config: Optional[job_query.QueryJobConfig], expected: bool +): + assert _job_helpers._supported_by_jobs_query(job_config) == expected + + +def test_wait_or_cancel_no_exception(): + job = mock.create_autospec(job_query.QueryJob, instance=True) + expected_rows = object() + job.result.return_value = expected_rows + retry = retries.Retry() + + rows = _job_helpers._wait_or_cancel( + job, + api_timeout=123, + wait_timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + + job.result.assert_called_once_with( + timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + assert rows is expected_rows + + +def test_wait_or_cancel_exception_cancels_job(): + job = mock.create_autospec(job_query.QueryJob, instance=True) + job.result.side_effect = google.api_core.exceptions.BadGateway("test error") + retry = retries.Retry() + + with pytest.raises(google.api_core.exceptions.BadGateway): + _job_helpers._wait_or_cancel( + job, + api_timeout=123, + wait_timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + + job.result.assert_called_once_with( + timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + job.cancel.assert_called_once_with( + timeout=123, + retry=retry, + ) + + +def test_wait_or_cancel_exception_raises_original_exception(): + job = mock.create_autospec(job_query.QueryJob, instance=True) + job.result.side_effect = google.api_core.exceptions.BadGateway("test error") + job.cancel.side_effect = google.api_core.exceptions.NotFound("don't raise me") + retry = retries.Retry() + + with pytest.raises(google.api_core.exceptions.BadGateway): + _job_helpers._wait_or_cancel( + job, + api_timeout=123, + wait_timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + + job.result.assert_called_once_with( + timeout=456, + retry=retry, + page_size=789, + max_results=101112, + ) + job.cancel.assert_called_once_with( + timeout=123, + retry=retry, + ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ff4c40f48..c8968adbb 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -70,8 +70,9 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import exceptions -from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from google.cloud.bigquery import ParquetOptions +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +import google.cloud.bigquery.table try: from google.cloud import bigquery_storage @@ -4953,20 +4954,17 @@ def test_query_w_client_default_config_no_incoming(self): ) def test_query_w_invalid_default_job_config(self): - job_id = "some-job-id" - query = "select count(*) from persons" creds = _make_credentials() http = object() default_job_config = object() - client = self._make_one( - project=self.PROJECT, - credentials=creds, - _http=http, - default_query_job_config=default_job_config, - ) with self.assertRaises(TypeError) as exc: - client.query(query, job_id=job_id, location=self.LOCATION) + self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_query_job_config=default_job_config, + ) self.assertIn("Expected an instance of QueryJobConfig", exc.exception.args[0]) def test_query_w_client_location(self): @@ -5213,6 +5211,150 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): assert result is mock.sentinel.query_job + def test_query_and_wait_defaults(self): + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + "schema": { + "fields": [ + { + "name": "f0_", + "type": "INTEGER", + "mode": "NULLABLE", + }, + ], + }, + "totalRows": "1", + "rows": [{"f": [{"v": "5552452"}]}], + "queryId": "job_abcDEF_", + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(jobs_query_response) + + rows = client.query_and_wait(query) + + self.assertIsInstance(rows, google.cloud.bigquery.table.RowIterator) + self.assertEqual(rows.query_id, "job_abcDEF_") + self.assertEqual(rows.total_rows, 1) + # No job reference in the response should be OK for completed query. + self.assertIsNone(rows.job_id) + self.assertIsNone(rows.project) + self.assertIsNone(rows.location) + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], "/projects/PROJECT/queries") + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) + sent = req["data"] + self.assertEqual(sent["query"], query) + self.assertFalse(sent["useLegacySql"]) + + def test_query_and_wait_w_default_query_job_config(self): + from google.cloud.bigquery import job + + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_query_job_config=job.QueryJobConfig( + labels={ + "default-label": "default-value", + }, + ), + ) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait(query) + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries") + sent = req["data"] + self.assertEqual(sent["labels"], {"default-label": "default-value"}) + + def test_query_and_wait_w_job_config(self): + from google.cloud.bigquery import job + + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + ) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait( + query, + job_config=job.QueryJobConfig( + labels={ + "job_config-label": "job_config-value", + }, + ), + ) + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries") + sent = req["data"] + self.assertEqual(sent["labels"], {"job_config-label": "job_config-value"}) + + def test_query_and_wait_w_location(self): + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait(query, location="not-the-client-location") + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries") + sent = req["data"] + self.assertEqual(sent["location"], "not-the-client-location") + + def test_query_and_wait_w_project(self): + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait(query, project="not-the-client-project") + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], "/projects/not-the-client-project/queries") + def test_insert_rows_w_timeout(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 7c3438567..1704abac7 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1375,6 +1375,16 @@ def test_job_id_present(self): query = self._make_one(resource) self.assertEqual(query.job_id, "custom-job") + def test_location_missing(self): + query = self._make_one({}) + self.assertIsNone(query.location) + + def test_location_present(self): + resource = self._make_resource() + resource["jobReference"]["location"] = "test-location" + query = self._make_one(resource) + self.assertEqual(query.location, "test-location") + def test_page_token_missing(self): query = self._make_one(self._make_resource()) self.assertIsNone(query.page_token) From 330fa5e3f9a50ccb19441ae40aad191365b0a110 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 11 Dec 2023 15:51:04 -0800 Subject: [PATCH 201/536] chore(main): release 3.14.0 (#1709) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41206fd78..c1bd5b389 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,33 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.14.0](https://github.com/googleapis/python-bigquery/compare/v3.13.0...v3.14.0) (2023-12-08) + + +### Features + +* Add `Client.query_and_wait` which directly returns a `RowIterator` of results ([#1722](https://github.com/googleapis/python-bigquery/issues/1722)) ([89a647e](https://github.com/googleapis/python-bigquery/commit/89a647e19fe5d7302c0a39bba77a155635c5c29d)) +* Add `job_id`, `location`, `project`, and `query_id` properties on `RowIterator` ([#1733](https://github.com/googleapis/python-bigquery/issues/1733)) ([494f275](https://github.com/googleapis/python-bigquery/commit/494f275ab2493dc7904f685c4d12e60bef51ab21)) +* Add `job_timeout_ms` to job configuration classes ([#1675](https://github.com/googleapis/python-bigquery/issues/1675)) ([84d64cd](https://github.com/googleapis/python-bigquery/commit/84d64cdd157afef4a7bf7807e557d59452133434)) +* Add support dataset.max_time_travel_hours ([#1683](https://github.com/googleapis/python-bigquery/issues/1683)) ([f22eff2](https://github.com/googleapis/python-bigquery/commit/f22eff25f116f1c4973ac2b8b03bc8a4ae1f3f42)) +* Add support for Dataset.isCaseInsensitive ([#1671](https://github.com/googleapis/python-bigquery/issues/1671)) ([386fa86](https://github.com/googleapis/python-bigquery/commit/386fa86c89b8cff69fc02213254a1c53c02fee42)) +* Add support for Python 3.12 ([#1736](https://github.com/googleapis/python-bigquery/issues/1736)) ([3c0976a](https://github.com/googleapis/python-bigquery/commit/3c0976aecb0f917477feef4e9ed865997c2bb106)) +* Removed pkg_resources from all test files and moved importlib into pandas extra ([#1726](https://github.com/googleapis/python-bigquery/issues/1726)) ([1f4ebb1](https://github.com/googleapis/python-bigquery/commit/1f4ebb1eca4f9380a31172fc8cb2fae125f8c5a2)) +* Support data_governance_type ([#1708](https://github.com/googleapis/python-bigquery/issues/1708)) ([eff365d](https://github.com/googleapis/python-bigquery/commit/eff365dc17755d0855338e2f273428ffe2056f67)) + + +### Bug Fixes + +* `load_table_from_dataframe` now assumes there may be local null values ([#1735](https://github.com/googleapis/python-bigquery/issues/1735)) ([f05dc69](https://github.com/googleapis/python-bigquery/commit/f05dc69a1f8c65ac32085bfcc6950c2c83f8a843)) +* Ensure query job retry has longer deadline than API request deadline ([#1734](https://github.com/googleapis/python-bigquery/issues/1734)) ([5573579](https://github.com/googleapis/python-bigquery/commit/55735791122f97b7f67cb962b489fd1f12210af5)) +* Keep `RowIterator.total_rows` populated after iteration ([#1748](https://github.com/googleapis/python-bigquery/issues/1748)) ([8482f47](https://github.com/googleapis/python-bigquery/commit/8482f4759ce3c4b00fa06a7f306a2ac4d4ee8eb7)) +* Move grpc, proto-plus and protobuf packages to extras ([#1721](https://github.com/googleapis/python-bigquery/issues/1721)) ([5ce4d13](https://github.com/googleapis/python-bigquery/commit/5ce4d136af97b91fbe1cc56bba1021e50a9c8476)) + + +### Performance Improvements + +* Use the first page a results when `query(api_method="QUERY")` ([#1723](https://github.com/googleapis/python-bigquery/issues/1723)) ([6290517](https://github.com/googleapis/python-bigquery/commit/6290517d6b153a31f20098f75aee580b7915aca9)) + ## [3.13.0](https://github.com/googleapis/python-bigquery/compare/v3.12.0...v3.13.0) (2023-10-30) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index ee029aced..7d9a17e98 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.13.0" +__version__ = "3.14.0" From 09017a997010f78bb6e34238fab15247ed14ea7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20W=C3=B3jcik?= Date: Wed, 13 Dec 2023 23:15:35 +0100 Subject: [PATCH 202/536] fix: add missing handler for deserializing json value (#1587) * fix: add missing handler for deserializing json value * fix mypy --------- Co-authored-by: Anthonios Partheniou Co-authored-by: Tim Swast Co-authored-by: Lingqing Gan --- google/cloud/bigquery/_helpers.py | 8 ++++++++ google/cloud/bigquery/query.py | 6 +++--- tests/unit/test__helpers.py | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 684cbfc12..13baea4ad 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -17,6 +17,7 @@ import base64 import datetime import decimal +import json import math import re import os @@ -412,6 +413,12 @@ def _time_to_json(value): return value +def _json_from_json(value, field): + """Coerce 'value' to a pythonic JSON representation, if set or not nullable.""" + if _not_null(value, field): + return json.loads(value) + + # Converters used for scalar values marshalled as row data. _SCALAR_VALUE_TO_JSON_ROW = { "INTEGER": _int_to_json, @@ -427,6 +434,7 @@ def _time_to_json(value): "DATETIME": _datetime_to_json, "DATE": _date_to_json, "TIME": _time_to_json, + "JSON": _json_from_json, # Make sure DECIMAL and BIGDECIMAL are handled, even though # requests for them should be converted to NUMERIC. Better safe # than sorry. diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 43591c648..a06ece503 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -469,7 +469,7 @@ def to_api_repr(self) -> dict: value = self.value converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) if converter is not None: - value = converter(value) + value = converter(value) # type: ignore resource: Dict[str, Any] = { "parameterType": {"type": self.type_}, "parameterValue": {"value": value}, @@ -626,7 +626,7 @@ def to_api_repr(self) -> dict: converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"]) if converter is not None: - values = [converter(value) for value in values] + values = [converter(value) for value in values] # type: ignore a_values = [{"value": value} for value in values] resource = { @@ -775,7 +775,7 @@ def to_api_repr(self) -> dict: s_types[name] = {"name": name, "type": {"type": type_}} converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) if converter is not None: - value = converter(value) + value = converter(value) # type: ignore values[name] = {"value": value} resource = { diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index e2e2da3c8..3c425da5f 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -58,6 +58,24 @@ def test_w_float_value(self): self.assertEqual(coerced, 42) +class Test_json_from_json(unittest.TestCase): + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _json_from_json + + return _json_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field("REQUIRED")) + + def test_w_string_value(self): + coerced = self._call_fut('{"foo": true}', object()) + self.assertEqual(coerced, {"foo": True}) + + class Test_float_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _float_from_json From c9af8c181998187e6636a1b0b4accb9fd98fa6cf Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:09:50 -0600 Subject: [PATCH 203/536] chore(main): release 3.14.1 (#1750) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1bd5b389..1c921fda8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.14.1](https://github.com/googleapis/python-bigquery/compare/v3.14.0...v3.14.1) (2023-12-13) + + +### Bug Fixes + +* Add missing handler for deserializing json value ([#1587](https://github.com/googleapis/python-bigquery/issues/1587)) ([09017a9](https://github.com/googleapis/python-bigquery/commit/09017a997010f78bb6e34238fab15247ed14ea7e)) + ## [3.14.0](https://github.com/googleapis/python-bigquery/compare/v3.13.0...v3.14.0) (2023-12-08) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 7d9a17e98..6073384c9 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.14.0" +__version__ = "3.14.1" From 0d930739c78b557db6cd48b38fe16eba93719c40 Mon Sep 17 00:00:00 2001 From: ericapetersson Date: Thu, 14 Dec 2023 17:48:14 +0100 Subject: [PATCH 204/536] fix: Deserializing JSON subfields within structs fails (#1742) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … for deserializing json subfields from bigquery, this adds support for that. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #[1500](https://togithub.com/googleapis/python-bigquery/issues/1500) 🦕 --- google/cloud/bigquery/_helpers.py | 19 ++++++++++++------- tests/unit/test__helpers.py | 16 ++++++++++++++-- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 13baea4ad..93b46341e 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -239,6 +239,15 @@ def _record_from_json(value, field): return record +def _json_from_json(value, field): + """Coerce 'value' to a Pythonic JSON representation.""" + if _not_null(value, field): + return json.loads(value) + else: + return None + + +# Parse BigQuery API response JSON into a Python representation. _CELLDATA_FROM_JSON = { "INTEGER": _int_from_json, "INT64": _int_from_json, @@ -257,6 +266,7 @@ def _record_from_json(value, field): "DATE": _date_from_json, "TIME": _time_from_json, "RECORD": _record_from_json, + "JSON": _json_from_json, } _QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) @@ -413,13 +423,8 @@ def _time_to_json(value): return value -def _json_from_json(value, field): - """Coerce 'value' to a pythonic JSON representation, if set or not nullable.""" - if _not_null(value, field): - return json.loads(value) - - -# Converters used for scalar values marshalled as row data. +# Converters used for scalar values marshalled to the BigQuery API, such as in +# query parameters or the tabledata.insert API. _SCALAR_VALUE_TO_JSON_ROW = { "INTEGER": _int_to_json, "INT64": _int_to_json, diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 3c425da5f..7bf55baeb 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -15,6 +15,7 @@ import base64 import datetime import decimal +import json import unittest import mock @@ -71,9 +72,20 @@ def test_w_none_required(self): with self.assertRaises(TypeError): self._call_fut(None, _Field("REQUIRED")) + def test_w_json_field(self): + data_field = _Field("REQUIRED", "data", "JSON") + + value = json.dumps( + {"v": {"key": "value"}}, + ) + + expected_output = {"v": {"key": "value"}} + coerced_output = self._call_fut(value, data_field) + self.assertEqual(coerced_output, expected_output) + def test_w_string_value(self): - coerced = self._call_fut('{"foo": true}', object()) - self.assertEqual(coerced, {"foo": True}) + coerced = self._call_fut('"foo"', object()) + self.assertEqual(coerced, "foo") class Test_float_from_json(unittest.TestCase): From c3b89fdd565c007b960fcb38029d6629af67f756 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 14 Dec 2023 13:35:48 -0800 Subject: [PATCH 205/536] chore: use freezegun to mock time in retry tests (#1753) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: use freezegun to mock time in retry tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- tests/unit/test_job_retry.py | 105 +++++++++++++++++------------------ 1 file changed, 50 insertions(+), 55 deletions(-) diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index b2095d2f2..4fa96fcec 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -20,6 +20,7 @@ import google.api_core.exceptions import google.api_core.retry +import freezegun from .helpers import make_connection @@ -156,69 +157,63 @@ def api_request(method, path, query_params=None, data=None, **kw): assert len(sleep.mock_calls) == 0 -@mock.patch("google.api_core.retry.datetime_helpers") @mock.patch("time.sleep") -def test_retry_failed_jobs_after_retry_failed(sleep, datetime_helpers, client): +def test_retry_failed_jobs_after_retry_failed(sleep, client): """ If at first you don't succeed, maybe you will later. :) """ conn = client._connection = make_connection() - datetime_helpers.utcnow.return_value = datetime.datetime(2021, 7, 29, 10, 43, 2) + with freezegun.freeze_time("2024-01-01 00:00:00") as frozen_datetime: + err = dict(reason="rateLimitExceeded") - err = dict(reason="rateLimitExceeded") - - def api_request(method, path, query_params=None, data=None, **kw): - calls = sleep.mock_calls - if calls: - datetime_helpers.utcnow.return_value += datetime.timedelta( - seconds=calls[-1][1][0] - ) - response = dict(status=dict(state="DONE", errors=[err], errorResult=err)) - response["jobReference"] = data["jobReference"] - return response - - conn.api_request.side_effect = api_request - - job = client.query("select 1") - orig_job_id = job.job_id - - with pytest.raises(google.api_core.exceptions.RetryError): - job.result() - - # We never got a successful job, so the job id never changed: - assert job.job_id == orig_job_id - - # We failed because we couldn't succeed after 120 seconds. - # But we can try again: - err2 = dict(reason="backendError") # We also retry on this - responses = [ - dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), - dict(status=dict(state="DONE", errors=[err], errorResult=err)), - dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), - dict(status=dict(state="DONE")), - dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), - ] - - def api_request(method, path, query_params=None, data=None, **kw): - calls = sleep.mock_calls - datetime_helpers.utcnow.return_value += datetime.timedelta( - seconds=calls[-1][1][0] - ) - response = responses.pop(0) - if data: + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + if calls: + frozen_datetime.tick(delta=datetime.timedelta(seconds=calls[-1][1][0])) + response = dict(status=dict(state="DONE", errors=[err], errorResult=err)) response["jobReference"] = data["jobReference"] - else: - response["jobReference"] = dict( - jobId=path.split("/")[-1], projectId="PROJECT" - ) - return response - - conn.api_request.side_effect = api_request - result = job.result() - assert result.total_rows == 1 - assert not responses # We made all the calls we expected to. - assert job.job_id != orig_job_id + return response + + conn.api_request.side_effect = api_request + + job = client.query("select 1") + orig_job_id = job.job_id + + with pytest.raises(google.api_core.exceptions.RetryError): + job.result() + + # We never got a successful job, so the job id never changed: + assert job.job_id == orig_job_id + + # We failed because we couldn't succeed after 120 seconds. + # But we can try again: + err2 = dict(reason="backendError") # We also retry on this + responses = [ + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + frozen_datetime.tick(delta=datetime.timedelta(seconds=calls[-1][1][0])) + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn.api_request.side_effect = api_request + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + assert job.job_id != orig_job_id def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): From fdd580f2b603ca29f1ef52d094ebb8016d9bdc9d Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 14 Dec 2023 20:27:23 -0500 Subject: [PATCH 206/536] ci: update required checks (#1749) * ci: update required checks * (test) remove retry from datetime_helpers path --------- Co-authored-by: Lingqing Gan --- .github/sync-repo-settings.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 220c031b2..6543d5285 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -11,10 +11,17 @@ branchProtectionRules: requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' + - 'Kokoro snippets-3.12' + - 'Kokoro system-3.8' + - 'Kokoro system-3.12' - 'cla/google' - 'Samples - Lint' - 'Samples - Python 3.7' - 'Samples - Python 3.8' + - 'Samples - Python 3.9' + - 'Samples - Python 3.10' + - 'Samples - Python 3.11' + - 'Samples - Python 3.12' - pattern: v2 requiresLinearHistory: true requiresCodeOwnerReviews: true From b9c8be0982c76187444300c414e0dda8b0ad105b Mon Sep 17 00:00:00 2001 From: Gaurang Shah Date: Mon, 18 Dec 2023 14:16:27 -0500 Subject: [PATCH 207/536] fix: load_table_from_dataframe for higher scale decimal (#1703) * fix: load_table_from_dataframe for higher scale decimal * Update test_client.py * fix test_load_table_from_dataframe_w_higher_scale_decimal128_datatype --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/_pandas_helpers.py | 4 ++- tests/unit/test_client.py | 43 ++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 380df7b1d..bcc869f15 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -23,6 +23,7 @@ import warnings from typing import Any, Union + from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema @@ -485,7 +486,6 @@ def augment_schema(dataframe, current_bq_schema): # pytype: disable=attribute-error augmented_schema = [] unknown_type_fields = [] - for field in current_bq_schema: if field.field_type is not None: augmented_schema.append(field) @@ -515,6 +515,8 @@ def augment_schema(dataframe, current_bq_schema): else: detected_mode = field.mode detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) + if detected_type == "NUMERIC" and arrow_table.type.scale > 9: + detected_type = "BIGNUMERIC" if detected_type is None: unknown_type_fields.append(field) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c8968adbb..ad22e0ddb 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8891,6 +8891,49 @@ def test_load_table_from_dataframe_with_csv_source_format(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.CSV + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + from decimal import Decimal + + client = self._make_client() + dataframe = pandas.DataFrame({"x": [Decimal("0.1234567891")]}) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert tuple(sent_config.schema) == ( + SchemaField("x", "BIGNUMERIC", "NULLABLE", None), + ) + def test_load_table_from_json_basic_use(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job From 02a7d129776b7da7da844ffa9c5cdf21811cd3af Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 19 Dec 2023 08:13:00 -0600 Subject: [PATCH 208/536] feat: support JSON type in `insert_rows` and as a scalar query parameter (#1757) Co-authored-by: Kira --- google/cloud/bigquery/_helpers.py | 9 ++++++++- tests/system/test_client.py | 7 ++++++- tests/system/test_query.py | 12 ++++++++++++ tests/unit/test__helpers.py | 16 ++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 93b46341e..4cf6dddac 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -374,6 +374,13 @@ def _bytes_to_json(value): return value +def _json_to_json(value): + """Coerce 'value' to a BigQuery REST API representation.""" + if value is None: + return None + return json.dumps(value) + + def _timestamp_to_json_parameter(value): """Coerce 'value' to an JSON-compatible representation. @@ -439,7 +446,7 @@ def _time_to_json(value): "DATETIME": _datetime_to_json, "DATE": _date_to_json, "TIME": _time_to_json, - "JSON": _json_from_json, + "JSON": _json_to_json, # Make sure DECIMAL and BIGDECIMAL are handled, even though # requests for them should be converted to NUMERIC. Better safe # than sorry. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 7cea8cfa4..92894455a 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2049,13 +2049,18 @@ def test_insert_rows_nested_nested(self): ), ], ), + SF("json_col", "JSON"), ] record = { "nested_string": "another string value", "nested_repeated": [0, 1, 2], "nested_record": {"nested_nested_string": "some deep insight"}, } - to_insert = [("Some value", record)] + json_record = { + "json_array": [1, 2, 3], + "json_object": {"alpha": "abc", "num": 123}, + } + to_insert = [("Some value", record, json_record)] table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) diff --git a/tests/system/test_query.py b/tests/system/test_query.py index 723f927d7..b8e0c00da 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -256,6 +256,18 @@ def test_query_statistics(bigquery_client, query_api_method): ) ], ), + pytest.param( + "SELECT @json", + {"alpha": "abc", "num": [1, 2, 3]}, + [ + ScalarQueryParameter( + name="json", + type_="JSON", + value={"alpha": "abc", "num": [1, 2, 3]}, + ) + ], + id="scalar-json", + ), ( "SELECT @naive_time", datetime.time(12, 41, 9, 62500), diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 7bf55baeb..87ab46669 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -886,6 +886,16 @@ def test_w_known_field_type(self): converted = self._call_fut(field, original) self.assertEqual(converted, str(original)) + def test_w_scalar_none(self): + import google.cloud.bigquery._helpers as module_under_test + + scalar_types = module_under_test._SCALAR_VALUE_TO_JSON_ROW.keys() + for type_ in scalar_types: + field = _make_field(type_) + original = None + converted = self._call_fut(field, original) + self.assertIsNone(converted, msg=f"{type_} did not return None") + class Test_single_field_to_json(unittest.TestCase): def _call_fut(self, field, value): @@ -921,6 +931,12 @@ def test_w_scalar_ignores_mode(self): converted = self._call_fut(field, original) self.assertEqual(converted, original) + def test_w_scalar_json(self): + field = _make_field("JSON") + original = {"alpha": "abc", "num": [1, 2, 3]} + converted = self._call_fut(field, original) + self.assertEqual(converted, json.dumps(original)) + class Test_repeated_field_to_json(unittest.TestCase): def _call_fut(self, field, value): From d225a94e718a85877c495fbd32eca607b8919ac6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 19 Dec 2023 16:00:29 -0600 Subject: [PATCH 209/536] perf: DB-API uses more efficient `query_and_wait` when no job ID is provided (#1747) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #1745 🦕 --- google/cloud/bigquery/_job_helpers.py | 1 + google/cloud/bigquery/client.py | 6 ++ google/cloud/bigquery/dbapi/cursor.py | 122 ++++++++++++---------- google/cloud/bigquery/job/query.py | 2 + google/cloud/bigquery/table.py | 32 ++++-- tests/unit/test_dbapi_cursor.py | 139 ++++++++++++++++++-------- tests/unit/test_table.py | 32 +++--- 7 files changed, 219 insertions(+), 115 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 095de4faa..7356331b8 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -491,6 +491,7 @@ def do_query(): job_id=query_results.job_id, query_id=query_results.query_id, project=query_results.project, + num_dml_affected_rows=query_results.num_dml_affected_rows, ) if job_retry is not None: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 284ccddb5..182319646 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3963,6 +3963,7 @@ def _list_rows_from_query_results( timeout: TimeoutType = DEFAULT_TIMEOUT, query_id: Optional[str] = None, first_page_response: Optional[Dict[str, Any]] = None, + num_dml_affected_rows: Optional[int] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4007,6 +4008,10 @@ def _list_rows_from_query_results( and not guaranteed to be populated. first_page_response (Optional[dict]): API response for the first page of results (if available). + num_dml_affected_rows (Optional[int]): + If this RowIterator is the result of a DML query, the number of + rows that were affected. + Returns: google.cloud.bigquery.table.RowIterator: Iterator of row data @@ -4047,6 +4052,7 @@ def _list_rows_from_query_results( job_id=job_id, query_id=query_id, first_page_response=first_page_response, + num_dml_affected_rows=num_dml_affected_rows, ) return row_iterator diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 0dc8f56ab..014a6825e 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -14,11 +14,12 @@ """Cursor for the Google BigQuery DB-API.""" +from __future__ import annotations + import collections from collections import abc as collections_abc -import copy -import logging import re +from typing import Optional try: from google.cloud.bigquery_storage import ArrowSerializationOptions @@ -34,8 +35,6 @@ import google.cloud.exceptions # type: ignore -_LOGGER = logging.getLogger(__name__) - # Per PEP 249: A 7-item sequence containing information describing one result # column. The first two items (name and type_code) are mandatory, the other # five are optional and are set to None if no meaningful values can be @@ -76,18 +75,31 @@ def __init__(self, connection): # most appropriate size. self.arraysize = None self._query_data = None - self._query_job = None + self._query_rows = None self._closed = False @property - def query_job(self): - """google.cloud.bigquery.job.query.QueryJob: The query job created by - the last ``execute*()`` call. + def query_job(self) -> Optional[job.QueryJob]: + """google.cloud.bigquery.job.query.QueryJob | None: The query job + created by the last ``execute*()`` call, if a query job was created. .. note:: If the last ``execute*()`` call was ``executemany()``, this is the last job created by ``executemany()``.""" - return self._query_job + rows = self._query_rows + + if rows is None: + return None + + job_id = rows.job_id + project = rows.project + location = rows.location + client = self.connection._client + + if job_id is None: + return None + + return client.get_job(job_id, location=location, project=project) def close(self): """Mark the cursor as closed, preventing its further use.""" @@ -117,8 +129,8 @@ def _set_description(self, schema): for field in schema ) - def _set_rowcount(self, query_results): - """Set the rowcount from query results. + def _set_rowcount(self, rows): + """Set the rowcount from a RowIterator. Normally, this sets rowcount to the number of rows returned by the query, but if it was a DML statement, it sets rowcount to the number @@ -129,10 +141,10 @@ def _set_rowcount(self, query_results): Results of a query. """ total_rows = 0 - num_dml_affected_rows = query_results.num_dml_affected_rows + num_dml_affected_rows = rows.num_dml_affected_rows - if query_results.total_rows is not None and query_results.total_rows > 0: - total_rows = query_results.total_rows + if rows.total_rows is not None and rows.total_rows > 0: + total_rows = rows.total_rows if num_dml_affected_rows is not None and num_dml_affected_rows > 0: total_rows = num_dml_affected_rows self.rowcount = total_rows @@ -165,9 +177,10 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): parameters (Union[Mapping[str, Any], Sequence[Any]]): (Optional) dictionary or sequence of parameter values. - job_id (str): - (Optional) The job_id to use. If not set, a job ID - is generated at random. + job_id (str | None): + (Optional and discouraged) The job ID to use when creating + the query job. For best performance and reliability, manually + setting a job ID is discouraged. job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Extra configuration options for the query job. @@ -181,7 +194,7 @@ def _execute( self, formatted_operation, parameters, job_id, job_config, parameter_types ): self._query_data = None - self._query_job = None + self._query_results = None client = self.connection._client # The DB-API uses the pyformat formatting, since the way BigQuery does @@ -190,33 +203,35 @@ def _execute( # libraries. query_parameters = _helpers.to_query_parameters(parameters, parameter_types) - if client._default_query_job_config: - if job_config: - config = job_config._fill_from_default(client._default_query_job_config) - else: - config = copy.deepcopy(client._default_query_job_config) - else: - config = job_config or job.QueryJobConfig(use_legacy_sql=False) - + config = job_config or job.QueryJobConfig() config.query_parameters = query_parameters - self._query_job = client.query( - formatted_operation, job_config=config, job_id=job_id - ) - if self._query_job.dry_run: - self._set_description(schema=None) - self.rowcount = 0 - return - - # Wait for the query to finish. + # Start the query and wait for the query to finish. try: - self._query_job.result() + if job_id is not None: + rows = client.query( + formatted_operation, + job_config=job_config, + job_id=job_id, + ).result( + page_size=self.arraysize, + ) + else: + rows = client.query_and_wait( + formatted_operation, + job_config=config, + page_size=self.arraysize, + ) except google.cloud.exceptions.GoogleCloudError as exc: raise exceptions.DatabaseError(exc) - query_results = self._query_job._query_results - self._set_rowcount(query_results) - self._set_description(query_results.schema) + self._query_rows = rows + self._set_description(rows.schema) + + if config.dry_run: + self.rowcount = 0 + else: + self._set_rowcount(rows) def executemany(self, operation, seq_of_parameters): """Prepare and execute a database operation multiple times. @@ -250,25 +265,26 @@ def _try_fetch(self, size=None): Mutates self to indicate that iteration has started. """ - if self._query_job is None: + if self._query_data is not None: + # Already started fetching the data. + return + + rows = self._query_rows + if rows is None: raise exceptions.InterfaceError( "No query results: execute() must be called before fetch." ) - if self._query_job.dry_run: - self._query_data = iter([]) + bqstorage_client = self.connection._bqstorage_client + if rows._should_use_bqstorage( + bqstorage_client, + create_bqstorage_client=False, + ): + rows_iterable = self._bqstorage_fetch(bqstorage_client) + self._query_data = _helpers.to_bq_table_rows(rows_iterable) return - if self._query_data is None: - bqstorage_client = self.connection._bqstorage_client - - if bqstorage_client is not None: - rows_iterable = self._bqstorage_fetch(bqstorage_client) - self._query_data = _helpers.to_bq_table_rows(rows_iterable) - return - - rows_iter = self._query_job.result(page_size=self.arraysize) - self._query_data = iter(rows_iter) + self._query_data = iter(rows) def _bqstorage_fetch(self, bqstorage_client): """Start fetching data with the BigQuery Storage API. @@ -290,7 +306,7 @@ def _bqstorage_fetch(self, bqstorage_client): # bigquery_storage can indeed be imported here without errors. from google.cloud import bigquery_storage - table_reference = self._query_job.destination + table_reference = self._query_rows._table requested_session = bigquery_storage.types.ReadSession( table=table_reference.to_bqstorage(), diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 4a529f949..ac0c51973 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1614,6 +1614,7 @@ def do_get_result(): project=self.project, job_id=self.job_id, query_id=self.query_id, + num_dml_affected_rows=self._query_results.num_dml_affected_rows, ) # We know that there's at least 1 row, so only treat the response from @@ -1639,6 +1640,7 @@ def do_get_result(): timeout=timeout, query_id=self.query_id, first_page_response=first_page_response, + num_dml_affected_rows=self._query_results.num_dml_affected_rows, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 70e601714..0ae7851a1 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1566,6 +1566,7 @@ def __init__( job_id: Optional[str] = None, query_id: Optional[str] = None, project: Optional[str] = None, + num_dml_affected_rows: Optional[int] = None, ): super(RowIterator, self).__init__( client, @@ -1592,6 +1593,7 @@ def __init__( self._job_id = job_id self._query_id = query_id self._project = project + self._num_dml_affected_rows = num_dml_affected_rows @property def _billing_project(self) -> Optional[str]: @@ -1616,6 +1618,16 @@ def location(self) -> Optional[str]: """ return self._location + @property + def num_dml_affected_rows(self) -> Optional[int]: + """If this RowIterator is the result of a DML query, the number of + rows that were affected. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.num_dml_affected_rows + """ + return self._num_dml_affected_rows + @property def project(self) -> Optional[str]: """GCP Project ID where these rows are read from.""" @@ -1635,7 +1647,10 @@ def _is_almost_completely_cached(self): This is useful to know, because we can avoid alternative download mechanisms. """ - if self._first_page_response is None: + if ( + not hasattr(self, "_first_page_response") + or self._first_page_response is None + ): return False total_cached_rows = len(self._first_page_response.get(self._items_key, [])) @@ -1655,7 +1670,7 @@ def _is_almost_completely_cached(self): return False - def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): + def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): """Returns True if the BigQuery Storage API can be used. Returns: @@ -1669,8 +1684,9 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): if self._table is None: return False - # The developer is manually paging through results if this is set. - if self.next_page_token is not None: + # The developer has already started paging through results if + # next_page_token is set. + if hasattr(self, "next_page_token") and self.next_page_token is not None: return False if self._is_almost_completely_cached(): @@ -1726,7 +1742,7 @@ def schema(self): @property def total_rows(self): - """int: The total number of rows in the table.""" + """int: The total number of rows in the table or query results.""" return self._total_rows def _maybe_warn_max_results( @@ -1752,7 +1768,7 @@ def _maybe_warn_max_results( def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): - if not self._validate_bqstorage(bqstorage_client, False): + if not self._should_use_bqstorage(bqstorage_client, False): bqstorage_client = None result_pages = ( @@ -1882,7 +1898,7 @@ def to_arrow( self._maybe_warn_max_results(bqstorage_client) - if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): + if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -2223,7 +2239,7 @@ def to_dataframe( self._maybe_warn_max_results(bqstorage_client) - if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): + if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index fc6ea3882..69d33fe17 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import functools import mock import operator as op import unittest import pytest +import google.cloud.bigquery.table as bq_table + try: import pyarrow except ImportError: # pragma: NO COVER @@ -48,27 +51,45 @@ def _mock_client( rows=None, schema=None, num_dml_affected_rows=None, - default_query_job_config=None, dry_run_job=False, total_bytes_processed=0, + total_rows=None, + destination_table="test-project.test_dataset.test_table", ): from google.cloud.bigquery import client - if rows is None: + if total_rows is None: total_rows = 0 - else: - total_rows = len(rows) + if rows is not None: + total_rows = len(rows) + table = bq_table.TableReference.from_string(destination_table) mock_client = mock.create_autospec(client.Client) - mock_client.query.return_value = self._mock_job( + mock_job = self._mock_job( total_rows=total_rows, schema=schema, num_dml_affected_rows=num_dml_affected_rows, dry_run=dry_run_job, total_bytes_processed=total_bytes_processed, - rows=rows, + rows=self._mock_rows( + rows, + total_rows=total_rows, + schema=schema, + num_dml_affected_rows=num_dml_affected_rows, + table=table, + ), + ) + mock_client.get_job.return_value = mock_job + mock_client.query.return_value = mock_job + mock_client.query_and_wait.return_value = self._mock_rows( + rows, + total_rows=total_rows, + schema=schema, + num_dml_affected_rows=num_dml_affected_rows, + # Sometimes all the results will be available in the initial + # response, in which case may be no job and no destination table. + table=table if rows is not None and total_rows > len(rows) else None, ) - mock_client._default_query_job_config = default_query_job_config # Assure that the REST client gets used, not the BQ Storage client. mock_client._ensure_bqstorage_client.return_value = None @@ -106,9 +127,6 @@ def _mock_job( ): from google.cloud.bigquery import job - if rows is None: - rows = [] - mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None mock_job.state = "DONE" @@ -136,6 +154,30 @@ def _mock_job( return mock_job + def _mock_rows( + self, rows, total_rows=0, schema=None, num_dml_affected_rows=None, table=None + ): + mock_rows = mock.create_autospec(bq_table.RowIterator, instance=True) + mock_rows.__iter__.return_value = rows + mock_rows._table = table + mock_rows._should_use_bqstorage = functools.partial( + bq_table.RowIterator._should_use_bqstorage, + mock_rows, + ) + mock_rows._is_almost_completely_cached = functools.partial( + bq_table.RowIterator._is_almost_completely_cached, + mock_rows, + ) + mock_rows.max_results = None + type(mock_rows).job_id = mock.PropertyMock(return_value="test-job-id") + type(mock_rows).location = mock.PropertyMock(return_value="test-location") + type(mock_rows).num_dml_affected_rows = mock.PropertyMock( + return_value=num_dml_affected_rows + ) + type(mock_rows).total_rows = mock.PropertyMock(return_value=total_rows) + type(mock_rows).schema = mock.PropertyMock(return_value=schema) + return mock_rows + def _mock_results(self, total_rows=0, schema=None, num_dml_affected_rows=None): from google.cloud.bigquery import query @@ -284,12 +326,15 @@ def test_fetchall_w_row(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi - from google.cloud.bigquery import table # use unordered data to also test any non-determenistic key order in dicts row_data = [ - table.Row([1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), - table.Row([2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), + bq_table.Row( + [1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0} + ), + bq_table.Row( + [2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0} + ), ] bqstorage_streamed_rows = [ { @@ -341,7 +386,12 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi - mock_client = self._mock_client(rows=[]) + mock_client = self._mock_client( + rows=[], + # Assume there are many more pages of data to look at so that the + # BQ Storage API is necessary. + total_rows=1000, + ) mock_bqstorage_client = self._mock_bqstorage_client(stream_count=0) mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client @@ -365,14 +415,18 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi - from google.cloud.bigquery import table - row_data = [table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] + row_data = [bq_table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): return bqstorage_client - mock_client = self._mock_client(rows=row_data) + mock_client = self._mock_client( + rows=row_data, + # Assume there are many more pages of data to look at so that the + # BQ Storage API is necessary. + total_rows=1000, + ) mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, @@ -400,16 +454,21 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): from google.cloud.bigquery import dbapi - from google.cloud.bigquery import table # Use unordered data to also test any non-determenistic key order in dicts. - row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] + row_data = [bq_table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}] def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): return bqstorage_client - mock_client = self._mock_client(rows=row_data) + mock_client = self._mock_client( + rows=row_data, + # Assume there are many more pages of data to look at so that the + # BQ Storage API is necessary. + total_rows=1000, + destination_table="P.DS.T", + ) mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, @@ -459,12 +518,8 @@ def test_execute_custom_job_id(self): def test_execute_w_default_config(self): from google.cloud.bigquery.dbapi import connect - from google.cloud.bigquery import job - default_config = job.QueryJobConfig(use_legacy_sql=False, flatten_results=True) - client = self._mock_client( - rows=[], num_dml_affected_rows=0, default_query_job_config=default_config - ) + client = self._mock_client(rows=[], num_dml_affected_rows=0) connection = connect(client) cursor = connection.cursor() @@ -472,10 +527,7 @@ def test_execute_w_default_config(self): _, kwargs = client.query.call_args used_config = kwargs["job_config"] - expected_config = job.QueryJobConfig( - use_legacy_sql=False, flatten_results=True, query_parameters=[] - ) - self.assertEqual(used_config._properties, expected_config._properties) + self.assertIsNone(used_config) def test_execute_custom_job_config_wo_default_config(self): from google.cloud.bigquery.dbapi import connect @@ -495,10 +547,7 @@ def test_execute_custom_job_config_w_default_config(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery import job - default_config = job.QueryJobConfig(use_legacy_sql=False, flatten_results=True) - client = self._mock_client( - rows=[], num_dml_affected_rows=0, default_query_job_config=default_config - ) + client = self._mock_client(rows=[], num_dml_affected_rows=0) connection = connect(client) cursor = connection.cursor() config = job.QueryJobConfig(use_legacy_sql=True) @@ -509,7 +558,6 @@ def test_execute_custom_job_config_w_default_config(self): used_config = kwargs["job_config"] expected_config = job.QueryJobConfig( use_legacy_sql=True, # the config passed to execute() prevails - flatten_results=True, # from the default query_parameters=[], ) self.assertEqual(used_config._properties, expected_config._properties) @@ -576,7 +624,7 @@ def test_execute_w_query_dry_run(self): connection = dbapi.connect( self._mock_client( - rows=[("hello", "world", 1), ("howdy", "y'all", 2)], + rows=[], schema=[ SchemaField("a", "STRING", mode="NULLABLE"), SchemaField("b", "STRING", mode="REQUIRED"), @@ -594,7 +642,7 @@ def test_execute_w_query_dry_run(self): ) self.assertEqual(cursor.rowcount, 0) - self.assertIsNone(cursor.description) + self.assertIsNotNone(cursor.description) rows = cursor.fetchall() self.assertEqual(list(rows), []) @@ -602,16 +650,11 @@ def test_execute_raises_if_result_raises(self): import google.cloud.exceptions from google.cloud.bigquery import client - from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import exceptions - job = mock.create_autospec(job.QueryJob) - job.dry_run = None - job.result.side_effect = google.cloud.exceptions.GoogleCloudError("") client = mock.create_autospec(client.Client) - client._default_query_job_config = None - client.query.return_value = job + client.query_and_wait.side_effect = google.cloud.exceptions.GoogleCloudError("") connection = connect(client) cursor = connection.cursor() @@ -677,6 +720,18 @@ def test_query_job_w_execute(self): cursor.execute("SELECT 1;") self.assertIsInstance(cursor.query_job, QueryJob) + def test_query_job_w_execute_no_job(self): + from google.cloud.bigquery import dbapi + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + cursor.execute("SELECT 1;") + + # Simulate jobless execution. + type(cursor._query_rows).job_id = mock.PropertyMock(return_value=None) + + self.assertIsNone(cursor.query_job) + def test_query_job_w_executemany(self): from google.cloud.bigquery import dbapi, QueryJob diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 9b3d4fe84..4a85a0823 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2133,6 +2133,14 @@ def test_location_present(self): rows = self._make_one(location="asia-northeast1") self.assertEqual(rows.location, "asia-northeast1") + def test_num_dml_affected_rows_missing(self): + rows = self._make_one() + self.assertIsNone(rows.num_dml_affected_rows) + + def test_num_dml_affected_rows_present(self): + rows = self._make_one(num_dml_affected_rows=1234) + self.assertEqual(rows.num_dml_affected_rows, 1234) + def test_project_missing(self): rows = self._make_one() self.assertIsNone(rows.project) @@ -2334,11 +2342,11 @@ def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self): iterator = self._make_one(first_page_response=first_page) self.assertTrue(iterator._is_almost_completely_cached()) - def test__validate_bqstorage_returns_false_when_completely_cached(self): + def test__should_use_bqstorage_returns_false_when_completely_cached(self): first_page = {"rows": []} iterator = self._make_one(first_page_response=first_page) self.assertFalse( - iterator._validate_bqstorage( + iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) ) @@ -2346,32 +2354,32 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test__validate_bqstorage_returns_true_if_no_cached_results(self): + def test__should_use_bqstorage_returns_true_if_no_cached_results(self): iterator = self._make_one(first_page_response=None) # not cached - result = iterator._validate_bqstorage( + result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) self.assertTrue(result) - def test__validate_bqstorage_returns_false_if_page_token_set(self): + def test__should_use_bqstorage_returns_false_if_page_token_set(self): iterator = self._make_one( page_token="abc", first_page_response=None # not cached ) - result = iterator._validate_bqstorage( + result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) self.assertFalse(result) - def test__validate_bqstorage_returns_false_if_max_results_set(self): + def test__should_use_bqstorage_returns_false_if_max_results_set(self): iterator = self._make_one( max_results=10, first_page_response=None # not cached ) - result = iterator._validate_bqstorage( + result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) self.assertFalse(result) - def test__validate_bqstorage_returns_false_if_missing_dependency(self): + def test__should_use_bqstorage_returns_false_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached def fail_bqstorage_import(name, globals, locals, fromlist, level): @@ -2383,7 +2391,7 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) with no_bqstorage: - result = iterator._validate_bqstorage( + result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) @@ -2392,7 +2400,7 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): + def test__should_use_bqstorage_returns_false_w_warning_if_obsolete_version(self): iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( @@ -2400,7 +2408,7 @@ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: - result = iterator._validate_bqstorage( + result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) From a65aaa65f3eed54f3a8b3c2f21d84930a0c4ff56 Mon Sep 17 00:00:00 2001 From: Patrick Marx Date: Thu, 21 Dec 2023 07:05:41 -0800 Subject: [PATCH 210/536] Update README.rst (#1743) Move instructions off deprecated module. Co-authored-by: Chalmer Lowe --- README.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 46f35e716..f81adc4b9 100644 --- a/README.rst +++ b/README.rst @@ -117,7 +117,7 @@ the BigQuery client the following PyPI packages need to be installed: .. code-block:: console - pip install google-cloud-bigquery[opentelemetry] opentelemetry-exporter-google-cloud + pip install google-cloud-bigquery[opentelemetry] opentelemetry-exporter-gcp-trace After installation, OpenTelemetry can be used in the BigQuery client and in BigQuery jobs. First, however, an exporter must be @@ -128,12 +128,11 @@ example of this can be found here: from opentelemetry import trace from opentelemetry.sdk.trace import TracerProvider - from opentelemetry.sdk.trace.export import BatchExportSpanProcessor + from opentelemetry.sdk.trace.export import BatchSpanProcessor from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter + tracer_provider = TracerProvider() + tracer_provider = BatchSpanProcessor(CloudTraceSpanExporter()) trace.set_tracer_provider(TracerProvider()) - trace.get_tracer_provider().add_span_processor( - BatchExportSpanProcessor(CloudTraceSpanExporter()) - ) In this example all tracing data will be published to the Google `Cloud Trace`_ console. For more information on OpenTelemetry, please consult the `OpenTelemetry documentation`_. From 132c14bbddfb61ea8bc408bef5e958e21b5b819c Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 5 Jan 2024 12:03:25 -0500 Subject: [PATCH 211/536] fix: Due to upstream change in dataset, updates expected results (#1761) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Due to upstream change in dataset, updates expected results * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- tests/system/test_client.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 92894455a..d7e56f7ff 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1781,7 +1781,6 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): ) result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()] - field_name = operator.itemgetter(0) fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] # Since DB API is not thread safe, only a single result stream should be @@ -1789,11 +1788,6 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): # in the sorted order. expected_data = [ - [ - ("by", "pg"), - ("id", 1), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 21, 51, tzinfo=UTC)), - ], [ ("by", "phyllis"), ("id", 2), @@ -1804,6 +1798,11 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): ("id", 3), ("timestamp", datetime.datetime(2006, 10, 9, 18, 40, 33, tzinfo=UTC)), ], + [ + ("by", "onebeerdave"), + ("id", 4), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 47, 42, tzinfo=UTC)), + ], ] self.assertEqual(fetched_data, expected_data) From 8585747058e6db49a8078ae44d8e10735cdc27f9 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 8 Jan 2024 11:00:43 -0800 Subject: [PATCH 212/536] feat: support RANGE in schema (#1746) * feat: support RANGE in schema * lint * fix python 3.7 error * remove unused test method * address comments * add system test * correct range json schema * json format * change system test to adjust to upstream table * fix systest * remove insert row with range * systest * add unit test * fix mypy error * error * address comments --- google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/schema.py | 73 ++++++++++++++++++++++++++- tests/data/schema.json | 8 +++ tests/unit/test_schema.py | 84 +++++++++++++++++++++++++++++++ 4 files changed, 166 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 72576e608..1ea056eb8 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -96,6 +96,7 @@ from google.cloud.bigquery.routine import RemoteFunctionOptions from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import FieldElementType from google.cloud.bigquery.standard_sql import StandardSqlDataType from google.cloud.bigquery.standard_sql import StandardSqlField from google.cloud.bigquery.standard_sql import StandardSqlStructType @@ -158,6 +159,7 @@ "RemoteFunctionOptions", # Shared helpers "SchemaField", + "FieldElementType", "PolicyTagList", "UDFResource", "ExternalConfig", diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 20a1bc92f..f5b03cbef 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -16,7 +16,7 @@ import collections import enum -from typing import Any, Dict, Iterable, Optional, Union +from typing import Any, Dict, Iterable, Optional, Union, cast from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -66,6 +66,46 @@ class _DefaultSentinel(enum.Enum): _DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE +class FieldElementType(object): + """Represents the type of a field element. + + Args: + element_type (str): The type of a field element. + """ + + def __init__(self, element_type: str): + self._properties = {} + self._properties["type"] = element_type.upper() + + @property + def element_type(self): + return self._properties.get("type") + + @classmethod + def from_api_repr(cls, api_repr: Optional[dict]) -> Optional["FieldElementType"]: + """Factory: construct a FieldElementType given its API representation. + + Args: + api_repr (Dict[str, str]): field element type as returned from + the API. + + Returns: + google.cloud.bigquery.FieldElementType: + Python object, as parsed from ``api_repr``. + """ + if not api_repr: + return None + return cls(api_repr["type"].upper()) + + def to_api_repr(self) -> dict: + """Construct the API resource representation of this field element type. + + Returns: + Dict[str, str]: Field element type represented as an API resource. + """ + return self._properties + + class SchemaField(object): """Describe a single field within a table schema. @@ -117,6 +157,12 @@ class SchemaField(object): - Struct or array composed with the above allowed functions, for example: "[CURRENT_DATE(), DATE '2020-01-01'"] + + range_element_type: FieldElementType, str, Optional + The subtype of the RANGE, if the type of this field is RANGE. If + the type is RANGE, this field is required. Possible values for the + field element type of a RANGE include `DATE`, `DATETIME` and + `TIMESTAMP`. """ def __init__( @@ -131,6 +177,7 @@ def __init__( precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + range_element_type: Union[FieldElementType, str, None] = None, ): self._properties: Dict[str, Any] = { "name": name, @@ -152,6 +199,11 @@ def __init__( self._properties["policyTags"] = ( policy_tags.to_api_repr() if policy_tags is not None else None ) + if isinstance(range_element_type, str): + self._properties["rangeElementType"] = {"type": range_element_type} + if isinstance(range_element_type, FieldElementType): + self._properties["rangeElementType"] = range_element_type.to_api_repr() + self._fields = tuple(fields) @staticmethod @@ -186,6 +238,12 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: policy_tags = PolicyTagList.from_api_repr(policy_tags) + if api_repr.get("rangeElementType"): + range_element_type = cast(dict, api_repr.get("rangeElementType")) + element_type = range_element_type.get("type") + else: + element_type = None + return cls( field_type=field_type, fields=[cls.from_api_repr(f) for f in fields], @@ -197,6 +255,7 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": precision=cls.__get_int(api_repr, "precision"), scale=cls.__get_int(api_repr, "scale"), max_length=cls.__get_int(api_repr, "maxLength"), + range_element_type=element_type, ) @property @@ -252,6 +311,18 @@ def max_length(self): """Optional[int]: Maximum length for the STRING or BYTES field.""" return self._properties.get("maxLength") + @property + def range_element_type(self): + """Optional[FieldElementType]: The subtype of the RANGE, if the + type of this field is RANGE. + + Must be set when ``type`` is `"RANGE"`. Must be one of `"DATE"`, + `"DATETIME"` or `"TIMESTAMP"`. + """ + if self._properties.get("rangeElementType"): + ret = self._properties.get("rangeElementType") + return FieldElementType.from_api_repr(ret) + @property def fields(self): """Optional[tuple]: Subfields contained in this field. diff --git a/tests/data/schema.json b/tests/data/schema.json index 6a36e55e5..29542e82d 100644 --- a/tests/data/schema.json +++ b/tests/data/schema.json @@ -83,6 +83,14 @@ "mode" : "NULLABLE", "name" : "FavoriteNumber", "type" : "NUMERIC" + }, + { + "mode" : "NULLABLE", + "name" : "TimeRange", + "type" : "RANGE", + "rangeElementType": { + "type": "DATETIME" + } } ] } diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index c6593e1b4..26ec0dfef 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -97,6 +97,36 @@ def test_constructor_subfields(self): self.assertEqual(field.fields[0], sub_field1) self.assertEqual(field.fields[1], sub_field2) + def test_constructor_range(self): + from google.cloud.bigquery.schema import FieldElementType + + field = self._make_one( + "test", + "RANGE", + mode="REQUIRED", + description="Testing", + range_element_type=FieldElementType("DATETIME"), + ) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "RANGE") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "Testing") + self.assertEqual(field.range_element_type.element_type, "DATETIME") + + def test_constructor_range_str(self): + field = self._make_one( + "test", + "RANGE", + mode="REQUIRED", + description="Testing", + range_element_type="DATETIME", + ) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "RANGE") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "Testing") + self.assertEqual(field.range_element_type.element_type, "DATETIME") + def test_to_api_repr(self): from google.cloud.bigquery.schema import PolicyTagList @@ -160,6 +190,7 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].name, "bar") self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") + self.assertEqual(field.range_element_type, None) def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -178,6 +209,23 @@ def test_from_api_repr_policy(self): self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") + def test_from_api_repr_range(self): + field = self._get_target_class().from_api_repr( + { + "mode": "nullable", + "description": "test_range", + "name": "foo", + "type": "range", + "rangeElementType": {"type": "DATETIME"}, + } + ) + self.assertEqual(field.name, "foo") + self.assertEqual(field.field_type, "RANGE") + self.assertEqual(field.mode, "NULLABLE") + self.assertEqual(field.description, "test_range") + self.assertEqual(len(field.fields), 0) + self.assertEqual(field.range_element_type.element_type, "DATETIME") + def test_from_api_repr_defaults(self): field = self._get_target_class().from_api_repr( {"name": "foo", "type": "record"} @@ -192,8 +240,10 @@ def test_from_api_repr_defaults(self): # _properties. self.assertIsNone(field.description) self.assertIsNone(field.policy_tags) + self.assertIsNone(field.range_element_type) self.assertNotIn("description", field._properties) self.assertNotIn("policyTags", field._properties) + self.assertNotIn("rangeElementType", field._properties) def test_name_property(self): name = "lemon-ness" @@ -566,6 +616,40 @@ def test___repr__evaluable_with_policy_tags(self): assert field == evaled_field +class TestFieldElementType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.schema import FieldElementType + + return FieldElementType + + def _make_one(self, *args): + return self._get_target_class()(*args) + + def test_constructor(self): + element_type = self._make_one("DATETIME") + self.assertEqual(element_type.element_type, "DATETIME") + self.assertEqual(element_type._properties["type"], "DATETIME") + + def test_to_api_repr(self): + element_type = self._make_one("DATETIME") + self.assertEqual(element_type.to_api_repr(), {"type": "DATETIME"}) + + def test_from_api_repr(self): + api_repr = {"type": "DATETIME"} + expected_element_type = self._make_one("DATETIME") + self.assertEqual( + expected_element_type.element_type, + self._get_target_class().from_api_repr(api_repr).element_type, + ) + + def test_from_api_repr_empty(self): + self.assertEqual(None, self._get_target_class().from_api_repr({})) + + def test_from_api_repr_none(self): + self.assertEqual(None, self._get_target_class().from_api_repr(None)) + + # TODO: dedup with the same class in test_table.py. class _SchemaBase(object): def _verify_field(self, field, r_field): From c0de6958e5761ad6ff532dd933b0f4387e18f1b9 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 9 Jan 2024 10:54:49 -0500 Subject: [PATCH 213/536] fix: updates types-protobuf version for mypy-samples nox session (#1764) --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 41492c7f0..66d68c04e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -219,7 +219,7 @@ def mypy_samples(session): session.install( "types-mock", "types-pytz", - "types-protobuf", + "types-protobuf!=4.24.0.20240106", # This version causes an error: 'Module "google.oauth2" has no attribute "service_account"' "types-python-dateutil", "types-requests", "types-setuptools", From 575f7fc8d38287ec93d31edc8a5655885417e651 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 15:39:34 -0600 Subject: [PATCH 214/536] chore(main): release 3.15.0 (#1752) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 21 +++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c921fda8..96ec9eceb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,27 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.15.0](https://github.com/googleapis/python-bigquery/compare/v3.14.1...v3.15.0) (2024-01-09) + + +### Features + +* Support JSON type in `insert_rows` and as a scalar query parameter ([#1757](https://github.com/googleapis/python-bigquery/issues/1757)) ([02a7d12](https://github.com/googleapis/python-bigquery/commit/02a7d129776b7da7da844ffa9c5cdf21811cd3af)) +* Support RANGE in schema ([#1746](https://github.com/googleapis/python-bigquery/issues/1746)) ([8585747](https://github.com/googleapis/python-bigquery/commit/8585747058e6db49a8078ae44d8e10735cdc27f9)) + + +### Bug Fixes + +* Deserializing JSON subfields within structs fails ([#1742](https://github.com/googleapis/python-bigquery/issues/1742)) ([0d93073](https://github.com/googleapis/python-bigquery/commit/0d930739c78b557db6cd48b38fe16eba93719c40)) +* Due to upstream change in dataset, updates expected results ([#1761](https://github.com/googleapis/python-bigquery/issues/1761)) ([132c14b](https://github.com/googleapis/python-bigquery/commit/132c14bbddfb61ea8bc408bef5e958e21b5b819c)) +* Load_table_from_dataframe for higher scale decimal ([#1703](https://github.com/googleapis/python-bigquery/issues/1703)) ([b9c8be0](https://github.com/googleapis/python-bigquery/commit/b9c8be0982c76187444300c414e0dda8b0ad105b)) +* Updates types-protobuf version for mypy-samples nox session ([#1764](https://github.com/googleapis/python-bigquery/issues/1764)) ([c0de695](https://github.com/googleapis/python-bigquery/commit/c0de6958e5761ad6ff532dd933b0f4387e18f1b9)) + + +### Performance Improvements + +* DB-API uses more efficient `query_and_wait` when no job ID is provided ([#1747](https://github.com/googleapis/python-bigquery/issues/1747)) ([d225a94](https://github.com/googleapis/python-bigquery/commit/d225a94e718a85877c495fbd32eca607b8919ac6)) + ## [3.14.1](https://github.com/googleapis/python-bigquery/compare/v3.14.0...v3.14.1) (2023-12-13) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 6073384c9..df08277f0 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.14.1" +__version__ = "3.15.0" From 08483fba675f3b87571787e1e4420134a8fc8177 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 10 Jan 2024 10:34:00 -0800 Subject: [PATCH 215/536] fix: add detailed message in job error (#1762) * fix: more detailed job error message * lint * fix mypy error * remove import ignore * Update google/cloud/bigquery/job/base.py Co-authored-by: Chalmer Lowe * Update google/cloud/bigquery/job/base.py Co-authored-by: Chalmer Lowe * variable name and unit test --------- Co-authored-by: Chalmer Lowe Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/job/base.py | 25 ++++++++++++++++++++++--- tests/unit/job/test_base.py | 21 +++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 97e0ea3bd..2641afea8 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -55,7 +55,7 @@ } -def _error_result_to_exception(error_result): +def _error_result_to_exception(error_result, errors=None): """Maps BigQuery error reasons to an exception. The reasons and their matching HTTP status codes are documented on @@ -66,6 +66,7 @@ def _error_result_to_exception(error_result): Args: error_result (Mapping[str, str]): The error result from BigQuery. + errors (Union[Iterable[str], None]): The detailed error messages. Returns: google.cloud.exceptions.GoogleAPICallError: The mapped exception. @@ -74,8 +75,24 @@ def _error_result_to_exception(error_result): status_code = _ERROR_REASON_TO_EXCEPTION.get( reason, http.client.INTERNAL_SERVER_ERROR ) + # Manually create error message to preserve both error_result and errors. + # Can be removed once b/310544564 and b/318889899 are resolved. + concatenated_errors = "" + if errors: + concatenated_errors = "; " + for err in errors: + concatenated_errors += ", ".join( + [f"{key}: {value}" for key, value in err.items()] + ) + concatenated_errors += "; " + + # strips off the last unneeded semicolon and space + concatenated_errors = concatenated_errors[:-2] + + error_message = error_result.get("message", "") + concatenated_errors + return exceptions.from_http_status( - status_code, error_result.get("message", ""), errors=[error_result] + status_code, error_message, errors=[error_result] ) @@ -886,7 +903,9 @@ def _set_future_result(self): return if self.error_result is not None: - exception = _error_result_to_exception(self.error_result) + exception = _error_result_to_exception( + self.error_result, self.errors or () + ) self.set_exception(exception) else: self.set_result(self) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 5635d0e32..a61fd3198 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -47,6 +47,27 @@ def test_missing_reason(self): exception = self._call_fut(error_result) self.assertEqual(exception.code, http.client.INTERNAL_SERVER_ERROR) + def test_contatenate_errors(self): + # Added test for b/310544564 and b/318889899. + # Ensures that error messages from both error_result and errors are + # present in the exception raised. + + error_result = { + "reason": "invalid1", + "message": "error message 1", + } + errors = [ + {"reason": "invalid2", "message": "error message 2"}, + {"reason": "invalid3", "message": "error message 3"}, + ] + + exception = self._call_fut(error_result, errors) + self.assertEqual( + exception.message, + "error message 1; reason: invalid2, message: error message 2; " + "reason: invalid3, message: error message 3", + ) + class Test_JobReference(unittest.TestCase): JOB_ID = "job-id" From a167f9a95f0a8fbf0bdb4943d06f07c03768c132 Mon Sep 17 00:00:00 2001 From: Dmytro Karacheban Date: Thu, 11 Jan 2024 18:37:26 +0200 Subject: [PATCH 216/536] feat: Add `table_constraints` field to Table model (#1755) * feat: add `table_constraints` field to Table model * Change `raise` to `return` in __eq__ methods * Fix __eq__ for ColumnReference * Add column_references to ForeignKey __eq__ * Add missing coverage * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py * Update tests/unit/test_table.py --------- Co-authored-by: Chalmer Lowe Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/table.py | 128 +++++++++++++++ tests/unit/test_table.py | 280 +++++++++++++++++++++++++++++++++ 2 files changed, 408 insertions(+) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 0ae7851a1..b3be4ff90 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -390,6 +390,7 @@ class Table(_TableBase): "view_use_legacy_sql": "view", "view_query": "view", "require_partition_filter": "requirePartitionFilter", + "table_constraints": "tableConstraints", } def __init__(self, table_ref, schema=None) -> None: @@ -973,6 +974,16 @@ def clone_definition(self) -> Optional["CloneDefinition"]: clone_info = CloneDefinition(clone_info) return clone_info + @property + def table_constraints(self) -> Optional["TableConstraints"]: + """Tables Primary Key and Foreign Key information.""" + table_constraints = self._properties.get( + self._PROPERTY_TO_API_FIELD["table_constraints"] + ) + if table_constraints is not None: + table_constraints = TableConstraints.from_api_repr(table_constraints) + return table_constraints + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. @@ -2958,6 +2969,123 @@ def __repr__(self): return "TimePartitioning({})".format(",".join(key_vals)) +class PrimaryKey: + """Represents the primary key constraint on a table's columns. + + Args: + columns: The columns that are composed of the primary key constraint. + """ + + def __init__(self, columns: List[str]): + self.columns = columns + + def __eq__(self, other): + if not isinstance(other, PrimaryKey): + raise TypeError("The value provided is not a BigQuery PrimaryKey.") + return self.columns == other.columns + + +class ColumnReference: + """The pair of the foreign key column and primary key column. + + Args: + referencing_column: The column that composes the foreign key. + referenced_column: The column in the primary key that are referenced by the referencingColumn. + """ + + def __init__(self, referencing_column: str, referenced_column: str): + self.referencing_column = referencing_column + self.referenced_column = referenced_column + + def __eq__(self, other): + if not isinstance(other, ColumnReference): + raise TypeError("The value provided is not a BigQuery ColumnReference.") + return ( + self.referencing_column == other.referencing_column + and self.referenced_column == other.referenced_column + ) + + +class ForeignKey: + """Represents a foreign key constraint on a table's columns. + + Args: + name: Set only if the foreign key constraint is named. + referenced_table: The table that holds the primary key and is referenced by this foreign key. + column_references: The columns that compose the foreign key. + """ + + def __init__( + self, + name: str, + referenced_table: TableReference, + column_references: List[ColumnReference], + ): + self.name = name + self.referenced_table = referenced_table + self.column_references = column_references + + def __eq__(self, other): + if not isinstance(other, ForeignKey): + raise TypeError("The value provided is not a BigQuery ForeignKey.") + return ( + self.name == other.name + and self.referenced_table == other.referenced_table + and self.column_references == other.column_references + ) + + @classmethod + def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignKey": + """Create an instance from API representation.""" + return cls( + name=api_repr["name"], + referenced_table=TableReference.from_api_repr(api_repr["referencedTable"]), + column_references=[ + ColumnReference( + column_reference_resource["referencingColumn"], + column_reference_resource["referencedColumn"], + ) + for column_reference_resource in api_repr["columnReferences"] + ], + ) + + +class TableConstraints: + """The TableConstraints defines the primary key and foreign key. + + Args: + primary_key: + Represents a primary key constraint on a table's columns. Present only if the table + has a primary key. The primary key is not enforced. + foreign_keys: + Present only if the table has a foreign key. The foreign key is not enforced. + + """ + + def __init__( + self, + primary_key: Optional[PrimaryKey], + foreign_keys: Optional[List[ForeignKey]], + ): + self.primary_key = primary_key + self.foreign_keys = foreign_keys + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": + """Create an instance from API representation.""" + primary_key = None + if "primaryKey" in resource: + primary_key = PrimaryKey(resource["primaryKey"]["columns"]) + + foreign_keys = None + if "foreignKeys" in resource: + foreign_keys = [ + ForeignKey.from_api_repr(foreign_key_resource) + for foreign_key_resource in resource["foreignKeys"] + ] + return cls(primary_key, foreign_keys) + + def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 4a85a0823..e4d0c66ab 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -603,6 +603,7 @@ def test_ctor(self): self.assertIsNone(table.encryption_configuration) self.assertIsNone(table.time_partitioning) self.assertIsNone(table.clustering_fields) + self.assertIsNone(table.table_constraints) def test_ctor_w_schema(self): from google.cloud.bigquery.schema import SchemaField @@ -901,6 +902,21 @@ def test_clone_definition_set(self): 2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC ) + def test_table_constraints_property_getter(self): + from google.cloud.bigquery.table import PrimaryKey, TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table._properties["tableConstraints"] = { + "primaryKey": {"columns": ["id"]}, + } + + table_constraints = table.table_constraints + + assert isinstance(table_constraints, TableConstraints) + assert table_constraints.primary_key == PrimaryKey(columns=["id"]) + def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -5393,6 +5409,270 @@ def test_set_expiration_w_none(self): assert time_partitioning._properties["expirationMs"] is None +class TestPrimaryKey(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import PrimaryKey + + return PrimaryKey + + @classmethod + def _make_one(cls, *args, **kwargs): + return cls._get_target_class()(*args, **kwargs) + + def test_constructor_explicit(self): + columns = ["id", "product_id"] + primary_key = self._make_one(columns) + + self.assertEqual(primary_key.columns, columns) + + def test__eq__columns_mismatch(self): + primary_key = self._make_one(columns=["id", "product_id"]) + other_primary_key = self._make_one(columns=["id"]) + + self.assertNotEqual(primary_key, other_primary_key) + + def test__eq__other_type(self): + primary_key = self._make_one(columns=["id", "product_id"]) + with self.assertRaises(TypeError): + primary_key == "This is not a Primary Key" + + +class TestColumnReference(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import ColumnReference + + return ColumnReference + + @classmethod + def _make_one(cls, *args, **kwargs): + return cls._get_target_class()(*args, **kwargs) + + def test_constructor_explicit(self): + referencing_column = "product_id" + referenced_column = "id" + column_reference = self._make_one(referencing_column, referenced_column) + + self.assertEqual(column_reference.referencing_column, referencing_column) + self.assertEqual(column_reference.referenced_column, referenced_column) + + def test__eq__referencing_column_mismatch(self): + column_reference = self._make_one( + referencing_column="product_id", + referenced_column="id", + ) + other_column_reference = self._make_one( + referencing_column="item_id", + referenced_column="id", + ) + + self.assertNotEqual(column_reference, other_column_reference) + + def test__eq__referenced_column_mismatch(self): + column_reference = self._make_one( + referencing_column="product_id", + referenced_column="id", + ) + other_column_reference = self._make_one( + referencing_column="product_id", + referenced_column="id_1", + ) + + self.assertNotEqual(column_reference, other_column_reference) + + def test__eq__other_type(self): + column_reference = self._make_one( + referencing_column="product_id", + referenced_column="id", + ) + with self.assertRaises(TypeError): + column_reference == "This is not a Column Reference" + + +class TestForeignKey(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import ForeignKey + + return ForeignKey + + @classmethod + def _make_one(cls, *args, **kwargs): + return cls._get_target_class()(*args, **kwargs) + + def test_constructor_explicit(self): + name = "my_fk" + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + column_references = [] + foreign_key = self._make_one(name, referenced_table, column_references) + + self.assertEqual(foreign_key.name, name) + self.assertEqual(foreign_key.referenced_table, referenced_table) + self.assertEqual(foreign_key.column_references, column_references) + + def test__eq__name_mismatch(self): + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + column_references = [] + foreign_key = self._make_one( + name="my_fk", + referenced_table=referenced_table, + column_references=column_references, + ) + other_foreign_key = self._make_one( + name="my_other_fk", + referenced_table=referenced_table, + column_references=column_references, + ) + + self.assertNotEqual(foreign_key, other_foreign_key) + + def test__eq__referenced_table_mismatch(self): + name = "my_fk" + column_references = [] + foreign_key = self._make_one( + name=name, + referenced_table=TableReference.from_string("my-project.mydataset.mytable"), + column_references=column_references, + ) + other_foreign_key = self._make_one( + name=name, + referenced_table=TableReference.from_string( + "my-project.mydataset.my-other-table" + ), + column_references=column_references, + ) + + self.assertNotEqual(foreign_key, other_foreign_key) + + def test__eq__column_references_mismatch(self): + from google.cloud.bigquery.table import ColumnReference + + name = "my_fk" + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + foreign_key = self._make_one( + name=name, + referenced_table=referenced_table, + column_references=[], + ) + other_foreign_key = self._make_one( + name=name, + referenced_table=referenced_table, + column_references=[ + ColumnReference( + referencing_column="product_id", referenced_column="id" + ), + ], + ) + + self.assertNotEqual(foreign_key, other_foreign_key) + + def test__eq__other_type(self): + foreign_key = self._make_one( + name="my_fk", + referenced_table=TableReference.from_string("my-project.mydataset.mytable"), + column_references=[], + ) + with self.assertRaises(TypeError): + foreign_key == "This is not a Foreign Key" + + +class TestTableConstraint(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import TableConstraints + + return TableConstraints + + @classmethod + def _make_one(cls, *args, **kwargs): + return cls._get_target_class()(*args, **kwargs) + + def test_constructor_defaults(self): + instance = self._make_one(primary_key=None, foreign_keys=None) + self.assertIsNone(instance.primary_key) + self.assertIsNone(instance.foreign_keys) + + def test_from_api_repr_full_resource(self): + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + TableReference, + ) + + resource = { + "primaryKey": { + "columns": ["id", "product_id"], + }, + "foreignKeys": [ + { + "name": "my_fk_name", + "referencedTable": { + "projectId": "my-project", + "datasetId": "your-dataset", + "tableId": "products", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"}, + ], + } + ], + } + instance = self._get_target_class().from_api_repr(resource) + + self.assertIsNotNone(instance.primary_key) + self.assertEqual(instance.primary_key.columns, ["id", "product_id"]) + self.assertEqual( + instance.foreign_keys, + [ + ForeignKey( + name="my_fk_name", + referenced_table=TableReference.from_string( + "my-project.your-dataset.products" + ), + column_references=[ + ColumnReference( + referencing_column="product_id", referenced_column="id" + ), + ], + ), + ], + ) + + def test_from_api_repr_only_primary_key_resource(self): + resource = { + "primaryKey": { + "columns": ["id"], + }, + } + instance = self._get_target_class().from_api_repr(resource) + + self.assertIsNotNone(instance.primary_key) + self.assertEqual(instance.primary_key.columns, ["id"]) + self.assertIsNone(instance.foreign_keys) + + def test_from_api_repr_only_foreign_keys_resource(self): + resource = { + "foreignKeys": [ + { + "name": "my_fk_name", + "referencedTable": { + "projectId": "my-project", + "datasetId": "your-dataset", + "tableId": "products", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"}, + ], + } + ] + } + instance = self._get_target_class().from_api_repr(resource) + + self.assertIsNone(instance.primary_key) + self.assertIsNotNone(instance.foreign_keys) + + @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) From 0fd7347ddb4ae1993f02b3bc109f64297437b3e2 Mon Sep 17 00:00:00 2001 From: Kira Date: Fri, 12 Jan 2024 11:34:15 -0500 Subject: [PATCH 217/536] feat: Support jsonExtension in LoadJobConfig (#1751) * feat: support jsonExtension in LoadJobConfig * reformatted with black * Updated doc string and added test for the encoding of jsonExtension * modified setter test to make sure property is set correctly --- google/cloud/bigquery/job/load.py | 13 +++++++++++++ tests/unit/job/test_load_config.py | 23 +++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 6b6c8bfd9..176435456 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -327,6 +327,19 @@ def ignore_unknown_values(self): def ignore_unknown_values(self, value): self._set_sub_prop("ignoreUnknownValues", value) + @property + def json_extension(self): + """Optional[str]: The extension to use for writing JSON data to BigQuery. Only supports GeoJSON currently. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.json_extension + + """ + return self._get_sub_prop("jsonExtension") + + @json_extension.setter + def json_extension(self, value): + self._set_sub_prop("jsonExtension", value) + @property def max_bad_records(self): """Optional[int]: Number of invalid rows to ignore. diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 4d25fa106..e1fa2641f 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -413,6 +413,29 @@ def test_ignore_unknown_values_setter(self): config.ignore_unknown_values = True self.assertTrue(config._properties["load"]["ignoreUnknownValues"]) + def test_json_extension_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.json_extension) + + def test_json_extension_hit(self): + config = self._get_target_class()() + config._properties["load"]["jsonExtension"] = "GEOJSON" + self.assertEqual(config.json_extension, "GEOJSON") + + def test_json_extension_setter(self): + config = self._get_target_class()() + self.assertFalse(config.json_extension) + config.json_extension = "GEOJSON" + self.assertTrue(config.json_extension) + self.assertEqual(config._properties["load"]["jsonExtension"], "GEOJSON") + + def test_to_api_repr_includes_json_extension(self): + config = self._get_target_class()() + config._properties["load"]["jsonExtension"] = "GEOJSON" + api_repr = config.to_api_repr() + self.assertIn("jsonExtension", api_repr["load"]) + self.assertEqual(api_repr["load"]["jsonExtension"], "GEOJSON") + def test_max_bad_records_missing(self): config = self._get_target_class()() self.assertIsNone(config.max_bad_records) From cf920f458bf2779b66656640ab730534bcf7b567 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 12 Jan 2024 13:41:03 -0500 Subject: [PATCH 218/536] chore(main): release 3.16.0 (#1765) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96ec9eceb..25c4ca1e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.16.0](https://github.com/googleapis/python-bigquery/compare/v3.15.0...v3.16.0) (2024-01-12) + + +### Features + +* Add `table_constraints` field to Table model ([#1755](https://github.com/googleapis/python-bigquery/issues/1755)) ([a167f9a](https://github.com/googleapis/python-bigquery/commit/a167f9a95f0a8fbf0bdb4943d06f07c03768c132)) +* Support jsonExtension in LoadJobConfig ([#1751](https://github.com/googleapis/python-bigquery/issues/1751)) ([0fd7347](https://github.com/googleapis/python-bigquery/commit/0fd7347ddb4ae1993f02b3bc109f64297437b3e2)) + + +### Bug Fixes + +* Add detailed message in job error ([#1762](https://github.com/googleapis/python-bigquery/issues/1762)) ([08483fb](https://github.com/googleapis/python-bigquery/commit/08483fba675f3b87571787e1e4420134a8fc8177)) + ## [3.15.0](https://github.com/googleapis/python-bigquery/compare/v3.14.1...v3.15.0) (2024-01-09) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index df08277f0..a3de40375 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.15.0" +__version__ = "3.16.0" From 0b5c1d597cdec3a05a16fb935595f773c5840bd4 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 16 Jan 2024 11:09:40 -0800 Subject: [PATCH 219/536] feat: support universe resolution (#1774) * feat: support universe resolution This PR wires up consumption of the universe_domain client option for resolving the endpoint for constructing the BQ client. Testing universes is not yet something we want to in this repo, so validation was done out of band. * formatting and testing * conditionals for stale core * formatting * unused import --- google/cloud/bigquery/_helpers.py | 3 +++ google/cloud/bigquery/client.py | 9 +++++++++ tests/unit/test_client.py | 17 +++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 4cf6dddac..905d4aee1 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -55,6 +55,9 @@ _DEFAULT_HOST = "https://bigquery.googleapis.com" """Default host for JSON API.""" +_DEFAULT_UNIVERSE = "googleapis.com" +"""Default universe for the JSON API.""" + def _get_bigquery_host(): return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 182319646..b2ea130c4 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -78,6 +78,7 @@ from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._helpers import _get_bigquery_host from google.cloud.bigquery._helpers import _DEFAULT_HOST +from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem @@ -252,6 +253,14 @@ def __init__( if client_options.api_endpoint: api_endpoint = client_options.api_endpoint kw_args["api_endpoint"] = api_endpoint + elif ( + hasattr(client_options, "universe_domain") + and client_options.universe_domain + and client_options.universe_domain is not _DEFAULT_UNIVERSE + ): + kw_args["api_endpoint"] = _DEFAULT_HOST.replace( + _DEFAULT_UNIVERSE, client_options.universe_domain + ) self._connection = Connection(self, **kw_args) self._location = location diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ad22e0ddb..56bdbad5e 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -201,6 +201,23 @@ def test_ctor_w_client_options_object(self): client._connection.API_BASE_URL, "https://www.foo-googleapis.com" ) + @pytest.mark.skipif( + packaging.version.parse(getattr(google.api_core, "__version__", "0.0.0")) + < packaging.version.Version("2.15.0"), + reason="universe_domain not supported with google-api-core < 2.15.0", + ) + def test_ctor_w_client_options_universe(self): + creds = _make_credentials() + http = object() + client_options = {"universe_domain": "foo.com"} + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + client_options=client_options, + ) + self.assertEqual(client._connection.API_BASE_URL, "https://bigquery.foo.com") + def test_ctor_w_location(self): from google.cloud.bigquery._http import Connection From 1271b18f14efb2d8c9bb3a5b35db2e949111cec5 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 16 Jan 2024 22:26:15 +0000 Subject: [PATCH 220/536] build(python): fix `docs` and `docfx` builds (#1779) Source-Link: https://togithub.com/googleapis/synthtool/commit/fac8444edd5f5526e804c306b766a271772a3e2f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:5ea6d0ab82c956b50962f91d94e206d3921537ae5fe1549ec5326381d8905cfa --- .github/.OwlBot.lock.yaml | 6 +- .kokoro/requirements.txt | 6 +- docs/reference.rst | 115 ++++---------------------------------- noxfile.py | 24 +++++++- 4 files changed, 39 insertions(+), 112 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 773c1dfd2..d8a1bbca7 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2f155882785883336b4468d5218db737bb1d10c9cea7cb62219ad16fe248c03c -# created: 2023-11-29T14:54:29.548172703Z + digest: sha256:5ea6d0ab82c956b50962f91d94e206d3921537ae5fe1549ec5326381d8905cfa +# created: 2024-01-15T16:32:08.142785673Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index e5c1ffca9..bb3d6ca38 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -263,9 +263,9 @@ jeepney==0.8.0 \ # via # keyring # secretstorage -jinja2==3.1.2 \ - --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ - --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 +jinja2==3.1.3 \ + --hash=sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa \ + --hash=sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90 # via gcp-releasetool keyring==24.2.0 \ --hash=sha256:4901caaf597bfd3bbd78c9a0c7c4c29fcd8310dab2cffefe749e916b6527acd6 \ diff --git a/docs/reference.rst b/docs/reference.rst index b886f1161..6c00df077 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -1,8 +1,6 @@ API Reference ~~~~~~~~~~~~~ -.. currentmodule:: google.cloud.bigquery - The main concepts with this API are: - :class:`~google.cloud.bigquery.client.Client` manages connections to the @@ -18,55 +16,12 @@ The main concepts with this API are: Client ====== -.. autosummary:: - :toctree: generated - - client.Client +.. automodule:: google.cloud.bigquery.client Job === -Job Configuration ------------------ - -.. autosummary:: - :toctree: generated - - job.QueryJobConfig - job.CopyJobConfig - job.LoadJobConfig - job.ExtractJobConfig - -Job Classes ------------ - -.. autosummary:: - :toctree: generated - - job.QueryJob - job.CopyJob - job.LoadJob - job.ExtractJob - -Job-Related Types ------------------ - -.. autosummary:: - :toctree: generated - - job.Compression - job.CreateDisposition - job.DestinationFormat - job.DmlStats - job.Encoding - job.OperationType - job.QueryPlanEntry - job.QueryPlanEntryStep - job.QueryPriority - job.ReservationUsage - job.SourceFormat - job.WriteDisposition - job.SchemaUpdateOption +.. automodule:: google.cloud.bigquery.job .. toctree:: :maxdepth: 2 @@ -77,63 +32,28 @@ Job-Related Types Dataset ======= -.. autosummary:: - :toctree: generated - - dataset.Dataset - dataset.DatasetListItem - dataset.DatasetReference - dataset.AccessEntry +.. automodule:: google.cloud.bigquery.dataset Table ===== -.. autosummary:: - :toctree: generated - - table.PartitionRange - table.RangePartitioning - table.Row - table.RowIterator - table.SnapshotDefinition - table.CloneDefinition - table.Table - table.TableListItem - table.TableReference - table.TimePartitioning - table.TimePartitioningType +.. automodule:: google.cloud.bigquery.table Model ===== -.. autosummary:: - :toctree: generated - - model.Model - model.ModelReference +.. automodule:: google.cloud.bigquery.model Routine ======= -.. autosummary:: - :toctree: generated - - routine.DeterminismLevel - routine.Routine - routine.RoutineArgument - routine.RoutineReference - routine.RoutineType +.. automodule:: google.cloud.bigquery.routine Schema ====== -.. autosummary:: - :toctree: generated - - schema.SchemaField - schema.PolicyTagList - +.. automodule:: google.cloud.bigquery.schema Query ===== @@ -147,25 +67,13 @@ Query Retries ======= -.. autosummary:: - :toctree: generated - - retry.DEFAULT_RETRY +.. automodule:: google.cloud.bigquery.retry External Configuration ====================== -.. autosummary:: - :toctree: generated - - external_config.ExternalSourceFormat - external_config.ExternalConfig - external_config.BigtableOptions - external_config.BigtableColumnFamily - external_config.BigtableColumn - external_config.CSVOptions - external_config.GoogleSheetsOptions +.. automodule:: google.cloud.bigquery.external_config .. toctree:: :maxdepth: 2 @@ -194,10 +102,7 @@ Enums Encryption Configuration ======================== -.. autosummary:: - :toctree: generated - - encryption_configuration.EncryptionConfiguration +.. automodule:: google.cloud.bigquery.encryption_configuration Additional Types diff --git a/noxfile.py b/noxfile.py index 66d68c04e..ae022232e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -418,7 +418,20 @@ def blacken(session): def docs(session): """Build the docs.""" - session.install("recommonmark", "sphinx==4.0.2", "sphinx_rtd_theme") + session.install( + # We need to pin to specific versions of the `sphinxcontrib-*` packages + # which still support sphinx 4.x. + # See https://github.com/googleapis/sphinx-docfx-yaml/issues/344 + # and https://github.com/googleapis/sphinx-docfx-yaml/issues/345. + "sphinxcontrib-applehelp==1.0.4", + "sphinxcontrib-devhelp==1.0.2", + "sphinxcontrib-htmlhelp==2.0.1", + "sphinxcontrib-qthelp==1.0.3", + "sphinxcontrib-serializinghtml==1.1.5", + "sphinx==4.5.0", + "alabaster", + "recommonmark", + ) session.install("google-cloud-storage") session.install("-e", ".[all]") @@ -443,6 +456,15 @@ def docfx(session): session.install("-e", ".") session.install( + # We need to pin to specific versions of the `sphinxcontrib-*` packages + # which still support sphinx 4.x. + # See https://github.com/googleapis/sphinx-docfx-yaml/issues/344 + # and https://github.com/googleapis/sphinx-docfx-yaml/issues/345. + "sphinxcontrib-applehelp==1.0.4", + "sphinxcontrib-devhelp==1.0.2", + "sphinxcontrib-htmlhelp==2.0.1", + "sphinxcontrib-qthelp==1.0.3", + "sphinxcontrib-serializinghtml==1.1.5", "gcp-sphinx-docfx-yaml", "alabaster", "recommonmark", From d90602de87e58b665cb974401a327a640805822f Mon Sep 17 00:00:00 2001 From: Stephanie A <129541811+DevStephanie@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:04:16 -0600 Subject: [PATCH 221/536] docs: update `snippets.py` to use `query_and_wait` (#1773) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- docs/snippets.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 72ac2a000..b4e78e36f 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -465,13 +465,12 @@ def test_client_query_total_rows(client, capsys): 'WHERE state = "TX" ' "LIMIT 100" ) - query_job = client.query( + results = client.query_and_wait( query, # Location must match that of the dataset(s) referenced in the query. location="US", - ) # API request - starts the query + ) # API request - starts the query and waits for results. - results = query_job.result() # Wait for query to complete. print("Got {} rows.".format(results.total_rows)) # [END bigquery_query_total_rows] @@ -551,7 +550,7 @@ def test_query_results_as_dataframe(client): LIMIT 10 """ - df = client.query(sql).to_dataframe() + df = client.query_and_wait(sql).to_dataframe() # [END bigquery_query_results_dataframe] assert isinstance(df, pandas.DataFrame) assert len(list(df)) == 2 # verify the number of columns From ffe80599429bef17681a37ec34e03488449ea812 Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 17 Jan 2024 15:08:00 -0500 Subject: [PATCH 222/536] chore: cleanup resources at startup (#1741) * chore: cleanup resources at startup time * reformmated with black for linter * changd how to call prefixer to clean up datasets, not tables * Removed formatting for uuid * Removed unneeded import of uuid * remove comment from dataset_access_test.py --- samples/snippets/authorized_view_tutorial_test.py | 10 +++------- samples/snippets/materialized_view_test.py | 3 ++- samples/snippets/natality_tutorial_test.py | 8 ++------ samples/snippets/quickstart_test.py | 6 ++---- samples/snippets/view_test.py | 3 ++- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/samples/snippets/authorized_view_tutorial_test.py b/samples/snippets/authorized_view_tutorial_test.py index e2220fb54..04f6312d3 100644 --- a/samples/snippets/authorized_view_tutorial_test.py +++ b/samples/snippets/authorized_view_tutorial_test.py @@ -13,12 +13,12 @@ # limitations under the License. from typing import Iterator, List -import uuid from google.cloud import bigquery import pytest import authorized_view_tutorial # type: ignore +from conftest import prefixer # type: ignore @pytest.fixture(scope="module") @@ -38,12 +38,8 @@ def test_authorized_view_tutorial( client: bigquery.Client, datasets_to_delete: List[str] ) -> None: override_values = { - "source_dataset_id": "github_source_data_{}".format( - str(uuid.uuid4()).replace("-", "_") - ), - "shared_dataset_id": "shared_views_{}".format( - str(uuid.uuid4()).replace("-", "_") - ), + "source_dataset_id": f"{prefixer.create_prefix()}_authorized_view_tutorial", + "shared_dataset_id": f"{prefixer.create_prefix()}_authorized_view_tutorial_shared_views", } source_dataset_ref = "{}.{}".format( client.project, override_values["source_dataset_id"] diff --git a/samples/snippets/materialized_view_test.py b/samples/snippets/materialized_view_test.py index 59e08131e..1b464af6f 100644 --- a/samples/snippets/materialized_view_test.py +++ b/samples/snippets/materialized_view_test.py @@ -21,6 +21,7 @@ import pytest import materialized_view # type: ignore +from conftest import prefixer # type: ignore def temp_suffix() -> str: @@ -37,7 +38,7 @@ def bigquery_client_patch( @pytest.fixture(scope="module") def dataset_id(bigquery_client: bigquery.Client) -> Iterator[str]: - dataset_id = f"mvdataset_{temp_suffix()}" + dataset_id = f"{prefixer.create_prefix()}_materialized_view" bigquery_client.create_dataset(dataset_id) yield dataset_id bigquery_client.delete_dataset(dataset_id, delete_contents=True) diff --git a/samples/snippets/natality_tutorial_test.py b/samples/snippets/natality_tutorial_test.py index 7f24ca5cb..603d142f2 100644 --- a/samples/snippets/natality_tutorial_test.py +++ b/samples/snippets/natality_tutorial_test.py @@ -13,12 +13,12 @@ # limitations under the License. from typing import Iterator, List -import uuid from google.cloud import bigquery import pytest import natality_tutorial # type: ignore +from conftest import prefixer # type: ignore @pytest.fixture(scope="module") @@ -37,11 +37,7 @@ def datasets_to_delete(client: bigquery.Client) -> Iterator[List[str]]: def test_natality_tutorial( client: bigquery.Client, datasets_to_delete: List[str] ) -> None: - override_values = { - "dataset_id": "natality_regression_{}".format( - str(uuid.uuid4()).replace("-", "_") - ), - } + override_values = {"dataset_id": f"{prefixer.create_prefix()}_natality_tutorial"} datasets_to_delete.append(override_values["dataset_id"]) natality_tutorial.run_natality_tutorial(override_values) diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py index 88a24618d..74a02a83a 100644 --- a/samples/snippets/quickstart_test.py +++ b/samples/snippets/quickstart_test.py @@ -13,12 +13,12 @@ # limitations under the License. from typing import Iterator, List -import uuid from google.cloud import bigquery import pytest import quickstart # type: ignore +from conftest import prefixer # type: ignore # Must match the dataset listed in quickstart.py (there's no easy way to # extract this). @@ -43,9 +43,7 @@ def test_quickstart( client: bigquery.Client, datasets_to_delete: List[str], ) -> None: - override_values = { - "dataset_id": "my_new_dataset_{}".format(str(uuid.uuid4()).replace("-", "_")), - } + override_values = {"dataset_id": f"{prefixer.create_prefix()}_quickstart"} datasets_to_delete.append(override_values["dataset_id"]) quickstart.run_quickstart(override_values) diff --git a/samples/snippets/view_test.py b/samples/snippets/view_test.py index 1e615db47..dfa1cdeee 100644 --- a/samples/snippets/view_test.py +++ b/samples/snippets/view_test.py @@ -20,6 +20,7 @@ import pytest import view # type: ignore +from conftest import prefixer # type: ignore def temp_suffix() -> str: @@ -53,7 +54,7 @@ def view_id(bigquery_client: bigquery.Client, view_dataset_id: str) -> Iterator[ def source_dataset_id( bigquery_client: bigquery.Client, project_id: str ) -> Iterator[str]: - dataset_id = f"{project_id}.view_{temp_suffix()}" + dataset_id = f"{prefixer.create_prefix()}_view" bigquery_client.create_dataset(dataset_id) yield dataset_id bigquery_client.delete_dataset(dataset_id, delete_contents=True) From d1161dddde41a7d35b30033ccbf6984a5de640bd Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 17 Jan 2024 15:53:20 -0500 Subject: [PATCH 223/536] docs: update multiple samples to change query to query_and_wait (#1784) * docs: update multiple samples for query_and_wait API * black * update rest of samples to use query_and_wait * changed query_jobs to results --- samples/client_query_add_column.py | 5 ++--- samples/client_query_destination_table_clustered.py | 5 +++-- samples/client_query_legacy_sql.py | 8 +++++--- samples/client_query_relax_column.py | 5 ++--- samples/client_query_w_struct_params.py | 6 ++++-- samples/download_public_data_sandbox.py | 4 +++- samples/snippets/authorized_view_tutorial.py | 6 ++---- samples/snippets/natality_tutorial.py | 3 +-- samples/snippets/simple_app.py | 6 ++---- samples/tests/conftest.py | 2 +- 10 files changed, 25 insertions(+), 25 deletions(-) diff --git a/samples/client_query_add_column.py b/samples/client_query_add_column.py index ec14087fb..6aae5fce4 100644 --- a/samples/client_query_add_column.py +++ b/samples/client_query_add_column.py @@ -36,14 +36,13 @@ def client_query_add_column(table_id: str) -> None: ) # Start the query, passing in the extra configuration. - query_job = client.query( + client.query_and_wait( # In this example, the existing table contains only the 'full_name' and # 'age' columns, while the results of this query will contain an # additional 'favorite_color' column. 'SELECT "Timmy" as full_name, 85 as age, "Blue" as favorite_color;', job_config=job_config, - ) # Make an API request. - query_job.result() # Wait for the job to complete. + ) # Make an API request and wait for job to complete. # Checks the updated length of the schema. table = client.get_table(table_id) # Make an API request. diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py index de9fff2d0..19330500a 100644 --- a/samples/client_query_destination_table_clustered.py +++ b/samples/client_query_destination_table_clustered.py @@ -31,8 +31,9 @@ def client_query_destination_table_clustered(table_id: str) -> None: ) # Start the query, passing in the extra configuration. - query_job = client.query(sql, job_config=job_config) # Make an API request. - query_job.result() # Wait for the job to complete. + client.query_and_wait( + sql, job_config=job_config + ) # Make an API request and wait for job to complete. table = client.get_table(table_id) # Make an API request. if table.clustering_fields == cluster_fields: diff --git a/samples/client_query_legacy_sql.py b/samples/client_query_legacy_sql.py index 44917e4e0..1fb5b797a 100644 --- a/samples/client_query_legacy_sql.py +++ b/samples/client_query_legacy_sql.py @@ -29,10 +29,12 @@ def client_query_legacy_sql() -> None: # Set use_legacy_sql to True to use legacy SQL syntax. job_config = bigquery.QueryJobConfig(use_legacy_sql=True) - # Start the query, passing in the extra configuration. - query_job = client.query(query, job_config=job_config) # Make an API request. + # Start the query and waits for query job to complete, passing in the extra configuration. + results = client.query_and_wait( + query, job_config=job_config + ) # Make an API request. print("The query data:") - for row in query_job: + for row in results: print(row) # [END bigquery_query_legacy] diff --git a/samples/client_query_relax_column.py b/samples/client_query_relax_column.py index 22ecb33d1..26dce888f 100644 --- a/samples/client_query_relax_column.py +++ b/samples/client_query_relax_column.py @@ -39,13 +39,12 @@ def client_query_relax_column(table_id: str) -> None: ) # Start the query, passing in the extra configuration. - query_job = client.query( + client.query_and_wait( # In this example, the existing table contains 'full_name' and 'age' as # required columns, but the query results will omit the second column. 'SELECT "Beyonce" as full_name;', job_config=job_config, - ) # Make an API request. - query_job.result() # Wait for the job to complete. + ) # Make an API request and wait for job to complete # Checks the updated number of required fields. table = client.get_table(table_id) # Make an API request. diff --git a/samples/client_query_w_struct_params.py b/samples/client_query_w_struct_params.py index 6b68e78ed..cda2fcb43 100644 --- a/samples/client_query_w_struct_params.py +++ b/samples/client_query_w_struct_params.py @@ -30,8 +30,10 @@ def client_query_w_struct_params() -> None: ) ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + results = client.query_and_wait( + query, job_config=job_config + ) # Make an API request and waits for results. - for row in query_job: + for row in results: print(row.s) # [END bigquery_query_params_structs] diff --git a/samples/download_public_data_sandbox.py b/samples/download_public_data_sandbox.py index e165a31ce..909a7da05 100644 --- a/samples/download_public_data_sandbox.py +++ b/samples/download_public_data_sandbox.py @@ -27,7 +27,9 @@ def download_public_data_sandbox() -> None: query_string = "SELECT * FROM `bigquery-public-data.usa_names.usa_1910_current`" # Use the BigQuery Storage API to speed-up downloads of large tables. - dataframe = client.query(query_string).to_dataframe(create_bqstorage_client=True) + dataframe = client.query_and_wait(query_string).to_dataframe( + create_bqstorage_client=True + ) print(dataframe.info()) # [END bigquery_pandas_public_data_sandbox] diff --git a/samples/snippets/authorized_view_tutorial.py b/samples/snippets/authorized_view_tutorial.py index bfb61bc38..f52170bc6 100644 --- a/samples/snippets/authorized_view_tutorial.py +++ b/samples/snippets/authorized_view_tutorial.py @@ -62,15 +62,13 @@ def run_authorized_view_tutorial( FROM `bigquery-public-data.github_repos.commits` LIMIT 1000 """ - query_job = client.query( + client.query_and_wait( sql, # Location must match that of the dataset(s) referenced in the query # and of the destination table. location="US", job_config=job_config, - ) # API request - starts the query - - query_job.result() # Waits for the query to finish + ) # API request - starts the query and waits for query to finish # [END bigquery_avt_create_source_table] # Create a separate dataset to store your view diff --git a/samples/snippets/natality_tutorial.py b/samples/snippets/natality_tutorial.py index b330a3c21..df9fc15be 100644 --- a/samples/snippets/natality_tutorial.py +++ b/samples/snippets/natality_tutorial.py @@ -83,8 +83,7 @@ def run_natality_tutorial(override_values: Optional[Dict[str, str]] = None) -> N """ # Run the query. - query_job = client.query(query, job_config=job_config) - query_job.result() # Waits for the query to finish + client.query_and_wait(query, job_config=job_config) # Waits for the query to finish # [END bigquery_query_natality_tutorial] diff --git a/samples/snippets/simple_app.py b/samples/snippets/simple_app.py index 3d856d4bb..8281e1877 100644 --- a/samples/snippets/simple_app.py +++ b/samples/snippets/simple_app.py @@ -27,7 +27,7 @@ def query_stackoverflow() -> None: client = bigquery.Client() # [END bigquery_simple_app_client] # [START bigquery_simple_app_query] - query_job = client.query( + results = client.query_and_wait( """ SELECT CONCAT( @@ -38,9 +38,7 @@ def query_stackoverflow() -> None: WHERE tags like '%google-bigquery%' ORDER BY view_count DESC LIMIT 10""" - ) - - results = query_job.result() # Waits for job to complete. + ) # Waits for job to complete. # [END bigquery_simple_app_query] # [START bigquery_simple_app_print] diff --git a/samples/tests/conftest.py b/samples/tests/conftest.py index 99bd2e367..2b5b89c43 100644 --- a/samples/tests/conftest.py +++ b/samples/tests/conftest.py @@ -174,7 +174,7 @@ def model_id(client: bigquery.Client, dataset_id: str) -> str: model_id ) - client.query(sql).result() + client.query_and_wait(sql) return model_id From 955a4cd99e21cbca1b2f9c1dc6aa3fd8070cd61f Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Thu, 18 Jan 2024 12:28:28 -0600 Subject: [PATCH 224/536] docs: update the query with no cache sample to use query_and_wait API (#1770) Co-authored-by: Salem Boyland Co-authored-by: Kira --- samples/query_no_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/query_no_cache.py b/samples/query_no_cache.py index 7501b7fc0..b942e5010 100644 --- a/samples/query_no_cache.py +++ b/samples/query_no_cache.py @@ -26,8 +26,8 @@ def query_no_cache() -> None: FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus; """ - query_job = client.query(sql, job_config=job_config) # Make an API request. + results = client.query_and_wait(sql, job_config=job_config) # Make an API request. - for row in query_job: + for row in results: print(row) # [END bigquery_query_no_cache] From 1f96439b3dbd27f11be5e2af84f290ec6094d0a4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 19 Jan 2024 17:35:26 -0600 Subject: [PATCH 225/536] docs: remove unused query code sample (#1769) * docs: remove unused query code sample This sample was moved in https://github.com/googleapis/python-bigquery/pull/1722/files#diff-2e8df14049580f42d6c73a3209838b96f3c9b185d2d7f2688683ae60bb2e7c43. Docs updated in internal change 597332356. * remove sample test too * update reference to query() sample in usage guides --------- Co-authored-by: Kira --- docs/usage/queries.rst | 4 +-- samples/client_query.py | 41 ------------------------------ samples/tests/test_client_query.py | 27 -------------------- 3 files changed, 2 insertions(+), 70 deletions(-) delete mode 100644 samples/client_query.py delete mode 100644 samples/tests/test_client_query.py diff --git a/docs/usage/queries.rst b/docs/usage/queries.rst index fc57e54de..56be8497e 100644 --- a/docs/usage/queries.rst +++ b/docs/usage/queries.rst @@ -5,9 +5,9 @@ Querying data ^^^^^^^^^^^^^ Run a query and wait for it to finish with the -:func:`~google.cloud.bigquery.client.Client.query` method: +:func:`~google.cloud.bigquery.client.Client.query_and_wait` method: -.. literalinclude:: ../samples/client_query.py +.. literalinclude:: ../samples/snippets/client_query.py :language: python :dedent: 4 :start-after: [START bigquery_query] diff --git a/samples/client_query.py b/samples/client_query.py deleted file mode 100644 index 80eac854e..000000000 --- a/samples/client_query.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -def client_query() -> None: - # TODO(swast): remove once docs in cloud.google.com have been updated to - # use samples/snippets/client_query.py - - # [START bigquery_query] - - from google.cloud import bigquery - - # Construct a BigQuery client object. - client = bigquery.Client() - - query = """ - SELECT name, SUM(number) as total_people - FROM `bigquery-public-data.usa_names.usa_1910_2013` - WHERE state = 'TX' - GROUP BY name, state - ORDER BY total_people DESC - LIMIT 20 - """ - query_job = client.query(query) # Make an API request. - - print("The query data:") - for row in query_job: - # Row values can be accessed by field name or index. - print("name={}, count={}".format(row[0], row["total_people"])) - # [END bigquery_query] diff --git a/samples/tests/test_client_query.py b/samples/tests/test_client_query.py deleted file mode 100644 index 5d4fb9c94..000000000 --- a/samples/tests/test_client_query.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import typing - -from .. import client_query - -if typing.TYPE_CHECKING: - import pytest - - -def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None: - client_query.client_query() - out, err = capsys.readouterr() - assert "The query data:" in out - assert "name=James, count=272793" in out From 89f1299b3164b51fb0f29bc600a34ded59c10682 Mon Sep 17 00:00:00 2001 From: Stephanie A <129541811+DevStephanie@users.noreply.github.com> Date: Mon, 22 Jan 2024 15:18:16 -0600 Subject: [PATCH 226/536] docs: Updates `query` to `query and wait` in samples/desktopapp/user_credentials.py (#1787) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Updates file * Updates files * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Updates * edits --------- Co-authored-by: Owl Bot --- samples/desktopapp/user_credentials.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/desktopapp/user_credentials.py b/samples/desktopapp/user_credentials.py index 487a56c5f..68236d126 100644 --- a/samples/desktopapp/user_credentials.py +++ b/samples/desktopapp/user_credentials.py @@ -61,10 +61,10 @@ def main(project: str) -> None: WHERE name = 'William' GROUP BY name; """ - query_job = client.query(query_string) + results = client.query_and_wait(query_string) # Print the results. - for row in query_job.result(): # Wait for the job to complete. + for row in results: # Wait for the job to complete. print("{}: {}".format(row["name"], row["total"])) # [END bigquery_auth_user_query] From 4ba434287a0a25f027e3b63a80f8881a9b16723e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 23 Jan 2024 14:08:58 -0600 Subject: [PATCH 227/536] fix: `query_and_wait` now retains unknown query configuration `_properties` (#1793) * fix: `query_and_wait` now retains unknown query configuration `_properties` fix: raise `ValueError` in `query_and_wait` with wrong `job_config` type --- google/cloud/bigquery/_job_helpers.py | 24 +++++---- tests/unit/test__job_helpers.py | 75 +++++++++++++++++++++++---- 2 files changed, 79 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 7356331b8..6debc377b 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -166,6 +166,14 @@ def do_query(): return future +def _validate_job_config(request_body: Dict[str, Any], invalid_key: str): + """Catch common mistakes, such as passing in a *JobConfig object of the + wrong type. + """ + if invalid_key in request_body: + raise ValueError(f"got unexpected key {repr(invalid_key)} in job_config") + + def _to_query_request( job_config: Optional[job.QueryJobConfig] = None, *, @@ -179,17 +187,15 @@ def _to_query_request( QueryRequest. If any configuration property is set that is not available in jobs.query, it will result in a server-side error. """ - request_body = {} - job_config_resource = job_config.to_api_repr() if job_config else {} - query_config_resource = job_config_resource.get("query", {}) + request_body = copy.copy(job_config.to_api_repr()) if job_config else {} - request_body.update(query_config_resource) + _validate_job_config(request_body, job.CopyJob._JOB_TYPE) + _validate_job_config(request_body, job.ExtractJob._JOB_TYPE) + _validate_job_config(request_body, job.LoadJob._JOB_TYPE) - # These keys are top level in job resource and query resource. - if "labels" in job_config_resource: - request_body["labels"] = job_config_resource["labels"] - if "dryRun" in job_config_resource: - request_body["dryRun"] = job_config_resource["dryRun"] + # Move query.* properties to top-level. + query_config_resource = request_body.pop("query", {}) + request_body.update(query_config_resource) # Default to standard SQL. request_body.setdefault("useLegacySql", False) diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index f2fe32d94..404a546ff 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -23,6 +23,9 @@ from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery.job import copy_ as job_copy +from google.cloud.bigquery.job import extract as job_extract +from google.cloud.bigquery.job import load as job_load from google.cloud.bigquery.job import query as job_query from google.cloud.bigquery.query import ConnectionProperty, ScalarQueryParameter @@ -57,9 +60,34 @@ def make_query_response( @pytest.mark.parametrize( ("job_config", "expected"), ( - (None, make_query_request()), - (job_query.QueryJobConfig(), make_query_request()), - ( + pytest.param( + None, + make_query_request(), + id="job_config=None-default-request", + ), + pytest.param( + job_query.QueryJobConfig(), + make_query_request(), + id="job_config=QueryJobConfig()-default-request", + ), + pytest.param( + job_query.QueryJobConfig.from_api_repr( + { + "unknownTopLevelProperty": "some-test-value", + "query": { + "unknownQueryProperty": "some-other-value", + }, + }, + ), + make_query_request( + { + "unknownTopLevelProperty": "some-test-value", + "unknownQueryProperty": "some-other-value", + } + ), + id="job_config-with-unknown-properties-includes-all-properties-in-request", + ), + pytest.param( job_query.QueryJobConfig(default_dataset="my-project.my_dataset"), make_query_request( { @@ -69,17 +97,24 @@ def make_query_response( } } ), + id="job_config-with-default_dataset", ), - (job_query.QueryJobConfig(dry_run=True), make_query_request({"dryRun": True})), - ( + pytest.param( + job_query.QueryJobConfig(dry_run=True), + make_query_request({"dryRun": True}), + id="job_config-with-dry_run", + ), + pytest.param( job_query.QueryJobConfig(use_query_cache=False), make_query_request({"useQueryCache": False}), + id="job_config-with-use_query_cache", ), - ( + pytest.param( job_query.QueryJobConfig(use_legacy_sql=True), make_query_request({"useLegacySql": True}), + id="job_config-with-use_legacy_sql", ), - ( + pytest.param( job_query.QueryJobConfig( query_parameters=[ ScalarQueryParameter("named_param1", "STRING", "param-value"), @@ -103,8 +138,9 @@ def make_query_response( ], } ), + id="job_config-with-query_parameters-named", ), - ( + pytest.param( job_query.QueryJobConfig( query_parameters=[ ScalarQueryParameter(None, "STRING", "param-value"), @@ -126,8 +162,9 @@ def make_query_response( ], } ), + id="job_config-with-query_parameters-positional", ), - ( + pytest.param( job_query.QueryJobConfig( connection_properties=[ ConnectionProperty(key="time_zone", value="America/Chicago"), @@ -142,14 +179,17 @@ def make_query_response( ] } ), + id="job_config-with-connection_properties", ), - ( + pytest.param( job_query.QueryJobConfig(labels={"abc": "def"}), make_query_request({"labels": {"abc": "def"}}), + id="job_config-with-labels", ), - ( + pytest.param( job_query.QueryJobConfig(maximum_bytes_billed=987654), make_query_request({"maximumBytesBilled": "987654"}), + id="job_config-with-maximum_bytes_billed", ), ), ) @@ -159,6 +199,19 @@ def test__to_query_request(job_config, expected): assert result == expected +@pytest.mark.parametrize( + ("job_config", "invalid_key"), + ( + pytest.param(job_copy.CopyJobConfig(), "copy", id="copy"), + pytest.param(job_extract.ExtractJobConfig(), "extract", id="extract"), + pytest.param(job_load.LoadJobConfig(), "load", id="load"), + ), +) +def test__to_query_request_raises_for_invalid_config(job_config, invalid_key): + with pytest.raises(ValueError, match=f"{repr(invalid_key)} in job_config"): + _job_helpers._to_query_request(job_config, query="SELECT 1") + + def test__to_query_job_defaults(): mock_client = mock.create_autospec(Client) response = make_query_response( From 6559dde1568b2d07c1e0a142194f2b370efb8983 Mon Sep 17 00:00:00 2001 From: Gaurang Shah Date: Tue, 23 Jan 2024 19:03:36 -0500 Subject: [PATCH 228/536] feature: add query location for bigquery magic (#1771) Co-authored-by: Lingqing Gan --- google/cloud/bigquery/magics/magics.py | 11 +++++++++++ tests/unit/test_magics.py | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 2a3583c66..b7c685d9a 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -508,6 +508,15 @@ def _create_dataset_if_necessary(client, dataset_id): "Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature." ), ) +@magic_arguments.argument( + "--location", + type=str, + default=None, + help=( + "Set the location to execute query." + "Defaults to location set in query setting in console." + ), +) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -551,6 +560,7 @@ def _cell_magic(line, query): category=DeprecationWarning, ) use_bqstorage_api = not args.use_rest_api + location = args.location params = [] if params_option_value: @@ -579,6 +589,7 @@ def _cell_magic(line, query): default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), client_options=bigquery_client_options, + location=location, ) if context._connection: client._connection = context._connection diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index b03894095..1511cba9c 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -2053,3 +2053,21 @@ def test_bigquery_magic_create_dataset_fails(): ) assert close_transports.called + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_location(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "--location=us-east1", "SELECT 17 AS num") + + client_options_used = run_query_mock.call_args_list[0][0][0] + assert client_options_used.location == "us-east1" From 17e9d06a1d1c2048ea9ec3c627a9afe654584a6b Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 13:39:09 -0600 Subject: [PATCH 229/536] chore(main): release 3.17.0 (#1780) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 22 ++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25c4ca1e5..bb916158d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,28 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.17.0](https://github.com/googleapis/python-bigquery/compare/v3.16.0...v3.17.0) (2024-01-24) + + +### Features + +* Support universe resolution ([#1774](https://github.com/googleapis/python-bigquery/issues/1774)) ([0b5c1d5](https://github.com/googleapis/python-bigquery/commit/0b5c1d597cdec3a05a16fb935595f773c5840bd4)) + + +### Bug Fixes + +* `query_and_wait` now retains unknown query configuration `_properties` ([#1793](https://github.com/googleapis/python-bigquery/issues/1793)) ([4ba4342](https://github.com/googleapis/python-bigquery/commit/4ba434287a0a25f027e3b63a80f8881a9b16723e)) +* Raise `ValueError` in `query_and_wait` with wrong `job_config` type ([4ba4342](https://github.com/googleapis/python-bigquery/commit/4ba434287a0a25f027e3b63a80f8881a9b16723e)) + + +### Documentation + +* Remove unused query code sample ([#1769](https://github.com/googleapis/python-bigquery/issues/1769)) ([1f96439](https://github.com/googleapis/python-bigquery/commit/1f96439b3dbd27f11be5e2af84f290ec6094d0a4)) +* Update `snippets.py` to use `query_and_wait` ([#1773](https://github.com/googleapis/python-bigquery/issues/1773)) ([d90602d](https://github.com/googleapis/python-bigquery/commit/d90602de87e58b665cb974401a327a640805822f)) +* Update multiple samples to change query to query_and_wait ([#1784](https://github.com/googleapis/python-bigquery/issues/1784)) ([d1161dd](https://github.com/googleapis/python-bigquery/commit/d1161dddde41a7d35b30033ccbf6984a5de640bd)) +* Update the query with no cache sample to use query_and_wait API ([#1770](https://github.com/googleapis/python-bigquery/issues/1770)) ([955a4cd](https://github.com/googleapis/python-bigquery/commit/955a4cd99e21cbca1b2f9c1dc6aa3fd8070cd61f)) +* Updates `query` to `query and wait` in samples/desktopapp/user_credentials.py ([#1787](https://github.com/googleapis/python-bigquery/issues/1787)) ([89f1299](https://github.com/googleapis/python-bigquery/commit/89f1299b3164b51fb0f29bc600a34ded59c10682)) + ## [3.16.0](https://github.com/googleapis/python-bigquery/compare/v3.15.0...v3.16.0) (2024-01-12) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a3de40375..9f62912a4 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.16.0" +__version__ = "3.17.0" From b402a6df92e656aee10dd2c11c48f6ed93c74fd7 Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Wed, 24 Jan 2024 12:41:27 -0800 Subject: [PATCH 230/536] fix: add pyarrow.large_strign to the _ARROW_SCALAR_IDS_TO_BQ map (#1796) Co-authored-by: Tim Swast --- google/cloud/bigquery/_pyarrow_helpers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 7266e5e02..946743eaf 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -91,6 +91,7 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + pyarrow.large_string().id: "STRING", # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", } From 39f33b210ecbe9c2fd390825d29393c2d80257f5 Mon Sep 17 00:00:00 2001 From: Kira Date: Wed, 24 Jan 2024 13:59:45 -0800 Subject: [PATCH 231/536] fix: retry 'job exceeded rate limits' for DDL queries (#1794) * fix: retry 'job exceeded rate limits' for DDL queries * Fixed retry test logic to better align to library standards * added docstring for test * deleted extra coverage file * Update tests/unit/test_job_retry.py Co-authored-by: Tim Swast * requested changes to retry jobs test * slight change to assert statemet * added TODO statements and fixed default job retry * modify sleep time and path names --------- Co-authored-by: Tim Swast --- google/cloud/bigquery/retry.py | 2 +- tests/unit/test_job_retry.py | 80 ++++++++++++++++++++++++++++++++++ tests/unit/test_retry.py | 27 ++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index b01c0662c..01b127972 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -73,7 +73,7 @@ def _should_retry(exc): deadline on the retry object. """ -job_retry_reasons = "rateLimitExceeded", "backendError" +job_retry_reasons = "rateLimitExceeded", "backendError", "jobRateLimitExceeded" def _job_should_retry(exc): diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 4fa96fcec..0e984c8fc 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -22,6 +22,10 @@ import google.api_core.retry import freezegun +from google.cloud.bigquery.client import Client +from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from .helpers import make_connection @@ -240,3 +244,79 @@ def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): ), ): job.result(job_retry=google.api_core.retry.Retry()) + + +def test_query_and_wait_retries_job_for_DDL_queries(): + """ + Specific test for retrying DDL queries with "jobRateLimitExceeded" error: + https://github.com/googleapis/python-bigquery/issues/1790 + """ + freezegun.freeze_time(auto_tick_seconds=1) + client = mock.create_autospec(Client) + client._call_api.__name__ = "_call_api" + client._call_api.__qualname__ = "Client._call_api" + client._call_api.__annotations__ = {} + client._call_api.__type_params__ = () + client._call_api.side_effect = ( + { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": False, + }, + google.api_core.exceptions.InternalServerError( + "job_retry me", errors=[{"reason": "jobRateLimitExceeded"}] + ), + google.api_core.exceptions.BadRequest( + "retry me", errors=[{"reason": "jobRateLimitExceeded"}] + ), + { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + }, + ) + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=DEFAULT_JOB_RETRY, + job_retry=DEFAULT_JOB_RETRY, + ) + assert len(list(rows)) == 4 + + # Relevant docs for the REST API path: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + # and https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults + query_request_path = "/projects/request-project/queries" + + calls = client._call_api.call_args_list + _, kwargs = calls[0] + assert kwargs["method"] == "POST" + assert kwargs["path"] == query_request_path + + # TODO: Add assertion statements for response paths after PR#1797 is fixed + + _, kwargs = calls[3] + assert kwargs["method"] == "POST" + assert kwargs["path"] == query_request_path diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 1109b7ff2..2fcb84e21 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -129,3 +129,30 @@ def test_DEFAULT_JOB_RETRY_deadline(): # Make sure we can retry the job at least once. assert DEFAULT_JOB_RETRY._deadline > DEFAULT_RETRY._deadline + + +def test_DEFAULT_JOB_RETRY_job_rate_limit_exceeded_retry_predicate(): + """Tests the retry predicate specifically for jobRateLimitExceeded.""" + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from google.api_core.exceptions import ClientError + + # Non-ClientError exceptions should never trigger a retry + assert not DEFAULT_JOB_RETRY._predicate(TypeError()) + + # ClientError without specific reason shouldn't trigger a retry + assert not DEFAULT_JOB_RETRY._predicate(ClientError("fail")) + + # ClientError with generic reason "idk" shouldn't trigger a retry + assert not DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="idk")]) + ) + + # ClientError with reason "jobRateLimitExceeded" should trigger a retry + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="jobRateLimitExceeded")]) + ) + + # Other retryable reasons should still work as expected + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="backendError")]) + ) From 6176fcc56d889ba86f114c372a2d0f9ccdc5f516 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 10:35:02 -0600 Subject: [PATCH 232/536] chore(main): release 3.17.1 (#1798) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 8 ++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb916158d..382123253 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.17.1](https://github.com/googleapis/python-bigquery/compare/v3.17.0...v3.17.1) (2024-01-24) + + +### Bug Fixes + +* Add pyarrow.large_strign to the _ARROW_SCALAR_IDS_TO_BQ map ([#1796](https://github.com/googleapis/python-bigquery/issues/1796)) ([b402a6d](https://github.com/googleapis/python-bigquery/commit/b402a6df92e656aee10dd2c11c48f6ed93c74fd7)) +* Retry 'job exceeded rate limits' for DDL queries ([#1794](https://github.com/googleapis/python-bigquery/issues/1794)) ([39f33b2](https://github.com/googleapis/python-bigquery/commit/39f33b210ecbe9c2fd390825d29393c2d80257f5)) + ## [3.17.0](https://github.com/googleapis/python-bigquery/compare/v3.16.0...v3.17.0) (2024-01-24) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 9f62912a4..9b72eddf7 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.17.0" +__version__ = "3.17.1" From 68ebbe12d455ce8e9b1784fb11787c2fb842ef22 Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Thu, 25 Jan 2024 12:19:24 -0600 Subject: [PATCH 233/536] docs: update `client_query_destination_table.py` sample to use `query_and_wait` (#1783) * docs: update client_query_destination_table.py to use query_and_wait API * docs: update client_query_destination_table.py to use query_and_wait API --------- Co-authored-by: Salem Boyland Co-authored-by: Kira Co-authored-by: Tim Swast --- samples/client_query_destination_table.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samples/client_query_destination_table.py b/samples/client_query_destination_table.py index 486576fea..f6a622229 100644 --- a/samples/client_query_destination_table.py +++ b/samples/client_query_destination_table.py @@ -32,8 +32,9 @@ def client_query_destination_table(table_id: str) -> None: """ # Start the query, passing in the extra configuration. - query_job = client.query(sql, job_config=job_config) # Make an API request. - query_job.result() # Wait for the job to complete. + client.query_and_wait( + sql, job_config=job_config + ) # Make an API request and wait for the query to finish. print("Query results loaded to the table {}".format(table_id)) # [END bigquery_query_destination_table] From 410f71e6b6e755928e363ed89c1044e14b0db9cc Mon Sep 17 00:00:00 2001 From: Stephanie A <129541811+DevStephanie@users.noreply.github.com> Date: Thu, 25 Jan 2024 14:34:17 -0600 Subject: [PATCH 234/536] docs: update to use `query_and_wait` in `client_query_w_positional_params.py` (#1786) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Updates file * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Correcting RowIterator * Variable corrections --------- Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- samples/client_query_w_positional_params.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/samples/client_query_w_positional_params.py b/samples/client_query_w_positional_params.py index 9cdde69ca..24f3e0dbe 100644 --- a/samples/client_query_w_positional_params.py +++ b/samples/client_query_w_positional_params.py @@ -35,8 +35,10 @@ def client_query_w_positional_params() -> None: bigquery.ScalarQueryParameter(None, "INT64", 250), ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + results = client.query_and_wait( + query, job_config=job_config + ) # Make an API request. - for row in query_job: + for row in results: print("{}: \t{}".format(row.word, row.word_count)) # [END bigquery_query_params_positional] From ef89f9e58c22b3af5a7757b69daa030116012350 Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Thu, 25 Jan 2024 15:39:41 -0600 Subject: [PATCH 235/536] Docs: update the query destination table legacy file to use query_and_wait API (#1775) * docs: update query destination table legacy file to use query_and_wait API * docs: update the query_destination_table_legacy.py to use the query_and_wait API --------- Co-authored-by: Salem Boyland Co-authored-by: Kira Co-authored-by: Chalmer Lowe --- samples/client_query_destination_table_legacy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samples/client_query_destination_table_legacy.py b/samples/client_query_destination_table_legacy.py index 37f50fdb4..26c3945ca 100644 --- a/samples/client_query_destination_table_legacy.py +++ b/samples/client_query_destination_table_legacy.py @@ -36,8 +36,9 @@ def client_query_destination_table_legacy(table_id: str) -> None: """ # Start the query, passing in the extra configuration. - query_job = client.query(sql, job_config=job_config) # Make an API request. - query_job.result() # Wait for the job to complete. + client.query_and_wait( + sql, job_config=job_config + ) # Make an API request and wait for the query to finish. print("Query results loaded to the table {}".format(table_id)) # [END bigquery_query_legacy_large_results] From dbf10dee51a7635e9b98658f205ded2de087a06f Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Thu, 25 Jan 2024 16:12:59 -0600 Subject: [PATCH 236/536] docs: update sample for query_to_arrow to use query_and_wait API (#1776) Co-authored-by: Salem Boyland Co-authored-by: Kira Co-authored-by: Chalmer Lowe --- samples/query_to_arrow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/query_to_arrow.py b/samples/query_to_arrow.py index f1afc7c94..d359bb79a 100644 --- a/samples/query_to_arrow.py +++ b/samples/query_to_arrow.py @@ -44,8 +44,8 @@ def query_to_arrow() -> "pyarrow.Table": FROM races r CROSS JOIN UNNEST(r.participants) as participant; """ - query_job = client.query(sql) - arrow_table = query_job.to_arrow() # Make an API request. + results = client.query_and_wait(sql) + arrow_table = results.to_arrow() # Make an API request. print( "Downloaded {} rows, {} columns.".format( From a7be88adf8a480ee61aa79789cb53df1b79bb091 Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Fri, 26 Jan 2024 01:18:11 -0600 Subject: [PATCH 237/536] docs: update query_external_sheets_permanent_table.py to use query_and_wait API (#1778) Co-authored-by: Salem Boyland Co-authored-by: Kira --- samples/query_external_sheets_permanent_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/query_external_sheets_permanent_table.py b/samples/query_external_sheets_permanent_table.py index f23f44259..0f8ddbae1 100644 --- a/samples/query_external_sheets_permanent_table.py +++ b/samples/query_external_sheets_permanent_table.py @@ -69,10 +69,10 @@ def query_external_sheets_permanent_table(dataset_id: str) -> None: # Example query to find states starting with "W". sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) - query_job = client.query(sql) # Make an API request. + results = client.query_and_wait(sql) # Make an API request. # Wait for the query to complete. - w_states = list(query_job) + w_states = list(results) print( "There are {} states with names starting with W in the selected range.".format( len(w_states) From 81563b06298fe3a64be6a89b583c3d64758ca12a Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Fri, 26 Jan 2024 07:12:04 -0600 Subject: [PATCH 238/536] docs: update to use API (#1781) Co-authored-by: Salem Boyland Co-authored-by: Kira --- samples/query_external_gcs_temporary_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/query_external_gcs_temporary_table.py b/samples/query_external_gcs_temporary_table.py index d622ab1a3..e0bc8438f 100644 --- a/samples/query_external_gcs_temporary_table.py +++ b/samples/query_external_gcs_temporary_table.py @@ -38,8 +38,8 @@ def query_external_gcs_temporary_table() -> None: # Example query to find states starting with 'W'. sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) - query_job = client.query(sql, job_config=job_config) # Make an API request. + results = client.query_and_wait(sql, job_config=job_config) # Make an API request. - w_states = list(query_job) # Wait for the job to complete. + w_states = list(results) # Wait for the job to complete. print("There are {} states with names starting with W.".format(len(w_states))) # [END bigquery_query_external_gcs_temp] From ba3694852c13c8a29fe0f9d923353e82acfd4278 Mon Sep 17 00:00:00 2001 From: Stephanie A <129541811+DevStephanie@users.noreply.github.com> Date: Fri, 26 Jan 2024 12:01:43 -0600 Subject: [PATCH 239/536] docs: update to use `query_and_wait` in `samples/client_query_w_timestamp_params.py` (#1785) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Updates file * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Correcting RowIterator * Correcting variable names --------- Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- samples/client_query_w_timestamp_params.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/samples/client_query_w_timestamp_params.py b/samples/client_query_w_timestamp_params.py index c1ade8782..1dbb1e9b6 100644 --- a/samples/client_query_w_timestamp_params.py +++ b/samples/client_query_w_timestamp_params.py @@ -32,8 +32,10 @@ def client_query_w_timestamp_params() -> None: ) ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + results = client.query_and_wait( + query, job_config=job_config + ) # Make an API request. - for row in query_job: + for row in results: print(row) # [END bigquery_query_params_timestamps] From 12985942942b8f205ecd261fcdf620df9a640460 Mon Sep 17 00:00:00 2001 From: Stephanie A <129541811+DevStephanie@users.noreply.github.com> Date: Mon, 29 Jan 2024 17:46:33 -0600 Subject: [PATCH 240/536] docs: update to_geodataframe to use query_and_wait functionality (#1800) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Updates file * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Correcting RowIterator * Correcting variable names * Correcting methods --------- Co-authored-by: Owl Bot Co-authored-by: Kira --- samples/geography/to_geodataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py index e36331f27..630d8d0bf 100644 --- a/samples/geography/to_geodataframe.py +++ b/samples/geography/to_geodataframe.py @@ -33,6 +33,6 @@ def get_austin_service_requests_as_geography() -> "pandas.DataFrame": LIMIT 10 """ - df = client.query(sql).to_geodataframe() + df = client.query_and_wait(sql).to_geodataframe() # [END bigquery_query_results_geodataframe] return df From 62490325f64e5d66303d9218992e28ac5f21cb3f Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Tue, 30 Jan 2024 10:02:42 -0800 Subject: [PATCH 241/536] fix: change load_table_from_json autodetect logic (#1804) --- google/cloud/bigquery/client.py | 18 ++- tests/system/test_client.py | 39 ++++++ tests/unit/test_client.py | 203 +++++++++++++++++++++++++++++++- 3 files changed, 255 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index b2ea130c4..4708e753b 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2833,8 +2833,22 @@ def load_table_from_json( new_job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON - if new_job_config.schema is None: - new_job_config.autodetect = True + # In specific conditions, we check if the table alread exists, and/or + # set the autodetect value for the user. For exact conditions, see table + # https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 + if new_job_config.schema is None and new_job_config.autodetect is None: + if new_job_config.write_disposition in ( + job.WriteDisposition.WRITE_TRUNCATE, + job.WriteDisposition.WRITE_EMPTY, + ): + new_job_config.autodetect = True + else: + try: + self.get_table(destination) + except core_exceptions.NotFound: + new_job_config.autodetect = True + else: + new_job_config.autodetect = False if project is None: project = self.project diff --git a/tests/system/test_client.py b/tests/system/test_client.py index d7e56f7ff..74c152cf2 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -994,6 +994,45 @@ def test_load_table_from_json_schema_autodetect(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2) + # Autodetect makes best effort to infer the schema, but situations exist + # when the detected schema is wrong, and does not match existing schema. + # Thus the client sets autodetect = False when table exists and just uses + # the existing schema. This test case uses a special case where backend has + # no way to distinguish int from string. + def test_load_table_from_json_schema_autodetect_table_exists(self): + json_rows = [ + {"name": "123", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, + {"name": "456", "age": 79, "birthday": "1940-03-10", "is_awesome": True}, + ] + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_json_basic_use".format( + Config.CLIENT.project, dataset_id + ) + + # Use schema with NULLABLE fields, because schema autodetection + # defaults to field mode NULLABLE. + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), + bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"), + bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"), + ) + # create the table before loading so that the column order is predictable + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # do not pass an explicit job config to trigger automatic schema detection + load_job = Config.CLIENT.load_table_from_json(json_rows, table_id) + load_job.result() + + table = Config.CLIENT.get_table(table) + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 2) + def test_load_avro_from_uri_then_dump_table(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SourceFormat diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 56bdbad5e..42581edc1 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8951,6 +8951,8 @@ def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self): SchemaField("x", "BIGNUMERIC", "NULLABLE", None), ) + # With autodetect specified, we pass the value as is. For more info, see + # https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 def test_load_table_from_json_basic_use(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -8962,12 +8964,28 @@ def test_load_table_from_json_basic_use(self): {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, ] + job_config = job.LoadJobConfig(autodetect=True) + load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - with load_patch as load_table_from_file: - client.load_table_from_json(json_rows, self.TABLE_REF) + # mock: remote table already exists + get_table_reference = { + "projectId": "project_id", + "datasetId": "test_dataset", + "tableId": "test_table", + } + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock(table_reference=get_table_reference), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_json( + json_rows, self.TABLE_REF, job_config=job_config + ) load_table_from_file.assert_called_once_with( client, @@ -9066,6 +9084,174 @@ def test_load_table_from_json_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + # When all following are true: + # (1) no schema provided; + # (2) no autodetect value provided; + # (3) writeDisposition == WRITE_APPEND or None; + # (4) table already exists, + # client sets autodetect == False + # For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 + def test_load_table_from_json_wo_schema_wo_autodetect_write_append_w_table(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.job import WriteDisposition + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + job_config = job.LoadJobConfig(write_disposition=WriteDisposition.WRITE_APPEND) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + # mock: remote table already exists + get_table_reference = { + "projectId": "project_id", + "datasetId": "test_dataset", + "tableId": "test_table", + } + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock(table_reference=get_table_reference), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_json( + json_rows, self.TABLE_REF, job_config=job_config + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema is None + assert not sent_config.autodetect + + # When all following are true: + # (1) no schema provided; + # (2) no autodetect value provided; + # (3) writeDisposition == WRITE_APPEND or None; + # (4) table does NOT exist, + # client sets autodetect == True + # For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 + def test_load_table_from_json_wo_schema_wo_autodetect_write_append_wo_table(self): + import google.api_core.exceptions as core_exceptions + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.job import WriteDisposition + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + job_config = job.LoadJobConfig(write_disposition=WriteDisposition.WRITE_APPEND) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + # mock: remote table doesn't exist + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=core_exceptions.NotFound(""), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_json( + json_rows, self.TABLE_REF, job_config=job_config + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema is None + assert sent_config.autodetect + + # When all following are true: + # (1) no schema provided; + # (2) no autodetect value provided; + # (3) writeDisposition == WRITE_TRUNCATE or WRITE_EMPTY; + # client sets autodetect == True + # For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 + def test_load_table_from_json_wo_schema_wo_autodetect_others(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.job import WriteDisposition + + client = self._make_client() + + json_rows = [ + {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False}, + {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True}, + ] + + job_config = job.LoadJobConfig( + write_disposition=WriteDisposition.WRITE_TRUNCATE + ) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json( + json_rows, self.TABLE_REF, job_config=job_config + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema is None + assert sent_config.autodetect + def test_load_table_from_json_w_explicit_job_config_override(self): from google.cloud.bigquery import job from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -9190,8 +9376,19 @@ def test_load_table_from_json_unicode_emoji_data_case(self): load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) + # mock: remote table already exists + get_table_reference = { + "projectId": "project_id", + "datasetId": "test_dataset", + "tableId": "test_table", + } + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock(table_reference=get_table_reference), + ) - with load_patch as load_table_from_file: + with load_patch as load_table_from_file, get_table_patch: client.load_table_from_json(json_rows, self.TABLE_REF) load_table_from_file.assert_called_once_with( From fe3183e674b5cf6da9c745722f2ba8bfe84b50bd Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 6 Feb 2024 09:58:34 -0500 Subject: [PATCH 242/536] chore(main): release 3.17.2 (#1799) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 382123253..7bedb5cf6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.17.2](https://github.com/googleapis/python-bigquery/compare/v3.17.1...v3.17.2) (2024-01-30) + + +### Bug Fixes + +* Change load_table_from_json autodetect logic ([#1804](https://github.com/googleapis/python-bigquery/issues/1804)) ([6249032](https://github.com/googleapis/python-bigquery/commit/62490325f64e5d66303d9218992e28ac5f21cb3f)) + + +### Documentation + +* Update to use API ([#1781](https://github.com/googleapis/python-bigquery/issues/1781)) ([81563b0](https://github.com/googleapis/python-bigquery/commit/81563b06298fe3a64be6a89b583c3d64758ca12a)) +* Update `client_query_destination_table.py` sample to use `query_and_wait` ([#1783](https://github.com/googleapis/python-bigquery/issues/1783)) ([68ebbe1](https://github.com/googleapis/python-bigquery/commit/68ebbe12d455ce8e9b1784fb11787c2fb842ef22)) +* Update query_external_sheets_permanent_table.py to use query_and_wait API ([#1778](https://github.com/googleapis/python-bigquery/issues/1778)) ([a7be88a](https://github.com/googleapis/python-bigquery/commit/a7be88adf8a480ee61aa79789cb53df1b79bb091)) +* Update sample for query_to_arrow to use query_and_wait API ([#1776](https://github.com/googleapis/python-bigquery/issues/1776)) ([dbf10de](https://github.com/googleapis/python-bigquery/commit/dbf10dee51a7635e9b98658f205ded2de087a06f)) +* Update the query destination table legacy file to use query_and_wait API ([#1775](https://github.com/googleapis/python-bigquery/issues/1775)) ([ef89f9e](https://github.com/googleapis/python-bigquery/commit/ef89f9e58c22b3af5a7757b69daa030116012350)) +* Update to use `query_and_wait` in `client_query_w_positional_params.py` ([#1786](https://github.com/googleapis/python-bigquery/issues/1786)) ([410f71e](https://github.com/googleapis/python-bigquery/commit/410f71e6b6e755928e363ed89c1044e14b0db9cc)) +* Update to use `query_and_wait` in `samples/client_query_w_timestamp_params.py` ([#1785](https://github.com/googleapis/python-bigquery/issues/1785)) ([ba36948](https://github.com/googleapis/python-bigquery/commit/ba3694852c13c8a29fe0f9d923353e82acfd4278)) +* Update to_geodataframe to use query_and_wait functionality ([#1800](https://github.com/googleapis/python-bigquery/issues/1800)) ([1298594](https://github.com/googleapis/python-bigquery/commit/12985942942b8f205ecd261fcdf620df9a640460)) + ## [3.17.1](https://github.com/googleapis/python-bigquery/compare/v3.17.0...v3.17.1) (2024-01-24) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 9b72eddf7..771b77a38 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.17.1" +__version__ = "3.17.2" From 6163711cf5591b07e4facfb30902d5eae31b7606 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 7 Feb 2024 16:48:22 -0800 Subject: [PATCH 243/536] build(deps): bump cryptography from 41.0.6 to 42.0.0 in /synthtool/gcp/templates/python_library/.kokoro (#1811) Source-Link: https://github.com/googleapis/synthtool/commit/e13b22b1f660c80e4c3e735a9177d2f16c4b8bdc Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:97b671488ad548ef783a452a9e1276ac10f144d5ae56d98cc4bf77ba504082b4 Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .github/.OwlBot.lock.yaml | 4 +-- .kokoro/requirements.txt | 57 ++++++++++++++++++++++----------------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index d8a1bbca7..2aefd0e91 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5ea6d0ab82c956b50962f91d94e206d3921537ae5fe1549ec5326381d8905cfa -# created: 2024-01-15T16:32:08.142785673Z + digest: sha256:97b671488ad548ef783a452a9e1276ac10f144d5ae56d98cc4bf77ba504082b4 +# created: 2024-02-06T03:20:16.660474034Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index bb3d6ca38..8c11c9f3e 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -93,30 +93,39 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==41.0.6 \ - --hash=sha256:068bc551698c234742c40049e46840843f3d98ad7ce265fd2bd4ec0d11306596 \ - --hash=sha256:0f27acb55a4e77b9be8d550d762b0513ef3fc658cd3eb15110ebbcbd626db12c \ - --hash=sha256:2132d5865eea673fe6712c2ed5fb4fa49dba10768bb4cc798345748380ee3660 \ - --hash=sha256:3288acccef021e3c3c10d58933f44e8602cf04dba96d9796d70d537bb2f4bbc4 \ - --hash=sha256:35f3f288e83c3f6f10752467c48919a7a94b7d88cc00b0668372a0d2ad4f8ead \ - --hash=sha256:398ae1fc711b5eb78e977daa3cbf47cec20f2c08c5da129b7a296055fbb22aed \ - --hash=sha256:422e3e31d63743855e43e5a6fcc8b4acab860f560f9321b0ee6269cc7ed70cc3 \ - --hash=sha256:48783b7e2bef51224020efb61b42704207dde583d7e371ef8fc2a5fb6c0aabc7 \ - --hash=sha256:4d03186af98b1c01a4eda396b137f29e4e3fb0173e30f885e27acec8823c1b09 \ - --hash=sha256:5daeb18e7886a358064a68dbcaf441c036cbdb7da52ae744e7b9207b04d3908c \ - --hash=sha256:60e746b11b937911dc70d164060d28d273e31853bb359e2b2033c9e93e6f3c43 \ - --hash=sha256:742ae5e9a2310e9dade7932f9576606836ed174da3c7d26bc3d3ab4bd49b9f65 \ - --hash=sha256:7e00fb556bda398b99b0da289ce7053639d33b572847181d6483ad89835115f6 \ - --hash=sha256:85abd057699b98fce40b41737afb234fef05c67e116f6f3650782c10862c43da \ - --hash=sha256:8efb2af8d4ba9dbc9c9dd8f04d19a7abb5b49eab1f3694e7b5a16a5fc2856f5c \ - --hash=sha256:ae236bb8760c1e55b7a39b6d4d32d2279bc6c7c8500b7d5a13b6fb9fc97be35b \ - --hash=sha256:afda76d84b053923c27ede5edc1ed7d53e3c9f475ebaf63c68e69f1403c405a8 \ - --hash=sha256:b27a7fd4229abef715e064269d98a7e2909ebf92eb6912a9603c7e14c181928c \ - --hash=sha256:b648fe2a45e426aaee684ddca2632f62ec4613ef362f4d681a9a6283d10e079d \ - --hash=sha256:c5a550dc7a3b50b116323e3d376241829fd326ac47bc195e04eb33a8170902a9 \ - --hash=sha256:da46e2b5df770070412c46f87bac0849b8d685c5f2679771de277a422c7d0b86 \ - --hash=sha256:f39812f70fc5c71a15aa3c97b2bbe213c3f2a460b79bd21c40d033bb34a9bf36 \ - --hash=sha256:ff369dd19e8fe0528b02e8df9f2aeb2479f89b1270d90f96a63500afe9af5cae +cryptography==42.0.0 \ + --hash=sha256:0a68bfcf57a6887818307600c3c0ebc3f62fbb6ccad2240aa21887cda1f8df1b \ + --hash=sha256:146e971e92a6dd042214b537a726c9750496128453146ab0ee8971a0299dc9bd \ + --hash=sha256:14e4b909373bc5bf1095311fa0f7fcabf2d1a160ca13f1e9e467be1ac4cbdf94 \ + --hash=sha256:206aaf42e031b93f86ad60f9f5d9da1b09164f25488238ac1dc488334eb5e221 \ + --hash=sha256:3005166a39b70c8b94455fdbe78d87a444da31ff70de3331cdec2c568cf25b7e \ + --hash=sha256:324721d93b998cb7367f1e6897370644751e5580ff9b370c0a50dc60a2003513 \ + --hash=sha256:33588310b5c886dfb87dba5f013b8d27df7ffd31dc753775342a1e5ab139e59d \ + --hash=sha256:35cf6ed4c38f054478a9df14f03c1169bb14bd98f0b1705751079b25e1cb58bc \ + --hash=sha256:3ca482ea80626048975360c8e62be3ceb0f11803180b73163acd24bf014133a0 \ + --hash=sha256:56ce0c106d5c3fec1038c3cca3d55ac320a5be1b44bf15116732d0bc716979a2 \ + --hash=sha256:5a217bca51f3b91971400890905a9323ad805838ca3fa1e202a01844f485ee87 \ + --hash=sha256:678cfa0d1e72ef41d48993a7be75a76b0725d29b820ff3cfd606a5b2b33fda01 \ + --hash=sha256:69fd009a325cad6fbfd5b04c711a4da563c6c4854fc4c9544bff3088387c77c0 \ + --hash=sha256:6cf9b76d6e93c62114bd19485e5cb003115c134cf9ce91f8ac924c44f8c8c3f4 \ + --hash=sha256:74f18a4c8ca04134d2052a140322002fef535c99cdbc2a6afc18a8024d5c9d5b \ + --hash=sha256:85f759ed59ffd1d0baad296e72780aa62ff8a71f94dc1ab340386a1207d0ea81 \ + --hash=sha256:87086eae86a700307b544625e3ba11cc600c3c0ef8ab97b0fda0705d6db3d4e3 \ + --hash=sha256:8814722cffcfd1fbd91edd9f3451b88a8f26a5fd41b28c1c9193949d1c689dc4 \ + --hash=sha256:8fedec73d590fd30c4e3f0d0f4bc961aeca8390c72f3eaa1a0874d180e868ddf \ + --hash=sha256:9515ea7f596c8092fdc9902627e51b23a75daa2c7815ed5aa8cf4f07469212ec \ + --hash=sha256:988b738f56c665366b1e4bfd9045c3efae89ee366ca3839cd5af53eaa1401bce \ + --hash=sha256:a2a8d873667e4fd2f34aedab02ba500b824692c6542e017075a2efc38f60a4c0 \ + --hash=sha256:bd7cf7a8d9f34cc67220f1195884151426ce616fdc8285df9054bfa10135925f \ + --hash=sha256:bdce70e562c69bb089523e75ef1d9625b7417c6297a76ac27b1b8b1eb51b7d0f \ + --hash=sha256:be14b31eb3a293fc6e6aa2807c8a3224c71426f7c4e3639ccf1a2f3ffd6df8c3 \ + --hash=sha256:be41b0c7366e5549265adf2145135dca107718fa44b6e418dc7499cfff6b4689 \ + --hash=sha256:c310767268d88803b653fffe6d6f2f17bb9d49ffceb8d70aed50ad45ea49ab08 \ + --hash=sha256:c58115384bdcfe9c7f644c72f10f6f42bed7cf59f7b52fe1bf7ae0a622b3a139 \ + --hash=sha256:c640b0ef54138fde761ec99a6c7dc4ce05e80420262c20fa239e694ca371d434 \ + --hash=sha256:ca20550bb590db16223eb9ccc5852335b48b8f597e2f6f0878bbfd9e7314eb17 \ + --hash=sha256:d97aae66b7de41cdf5b12087b5509e4e9805ed6f562406dfcf60e8481a9a28f8 \ + --hash=sha256:e9326ca78111e4c645f7e49cbce4ed2f3f85e17b61a563328c85a5208cf34440 # via # gcp-releasetool # secretstorage From 57be031f90e415a25958ccb4afd682916b0573b9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 8 Feb 2024 13:12:25 -0600 Subject: [PATCH 244/536] chore: skip `test_to_dataframe_iterable_w_bqstorage_max_results_warning` if google-cloud-bigquery-storage is not installed (#1814) --- tests/unit/test_table.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index e4d0c66ab..00a7f06e6 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -3285,6 +3285,9 @@ def test_to_dataframe_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): from google.cloud.bigquery import schema From ab0cf4cc03292f62b56a8813cfb7681daa87f872 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 12 Feb 2024 13:46:25 -0600 Subject: [PATCH 245/536] feat: support nullable boolean and Int64 dtypes in `insert_rows_from_dataframe` (#1816) --- google/cloud/bigquery/_pandas_helpers.py | 19 +++++++ tests/system/test_pandas.py | 13 ++++- tests/unit/test__pandas_helpers.py | 65 +++++++++++++++++------- 3 files changed, 79 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index bcc869f15..e97dda7e5 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -958,6 +958,25 @@ def dataframe_to_json_generator(dataframe): # considered a NaN, however. if isinstance(is_nan, bool) and is_nan: continue + + # Convert numpy types to corresponding Python types. + # https://stackoverflow.com/a/60441783/101923 + if isinstance(value, numpy.bool_): + value = bool(value) + elif isinstance( + value, + ( + numpy.int64, + numpy.int32, + numpy.int16, + numpy.int8, + numpy.uint64, + numpy.uint32, + numpy.uint16, + numpy.uint8, + ), + ): + value = int(value) output[column] = value yield output diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index e93f245c0..85c7b79e6 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -835,7 +835,9 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): schema = [ SF("float_col", "FLOAT", mode="REQUIRED"), SF("int_col", "INTEGER", mode="REQUIRED"), + SF("int64_col", "INTEGER", mode="NULLABLE"), SF("bool_col", "BOOLEAN", mode="REQUIRED"), + SF("boolean_col", "BOOLEAN", mode="NULLABLE"), SF("string_col", "STRING", mode="NULLABLE"), SF("date_col", "DATE", mode="NULLABLE"), SF("time_col", "TIME", mode="NULLABLE"), @@ -898,6 +900,15 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): dataframe["date_col"] = dataframe["date_col"].astype("dbdate") dataframe["time_col"] = dataframe["time_col"].astype("dbtime") + # Support nullable integer and boolean dtypes. + # https://github.com/googleapis/python-bigquery/issues/1815 + dataframe["int64_col"] = pandas.Series( + [-11, -22, pandas.NA, -44, -55, -66], dtype="Int64" + ) + dataframe["boolean_col"] = pandas.Series( + [True, False, True, pandas.NA, True, False], dtype="boolean" + ) + table_id = f"{bigquery_client.project}.{dataset_id}.test_insert_rows_from_dataframe" table_arg = bigquery.Table(table_id, schema=schema) table = helpers.retry_403(bigquery_client.create_table)(table_arg) @@ -910,7 +921,7 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): expected = [ # Pandas often represents NULL values as NaN. Convert to None for # easier comparison. - tuple(None if col != col else col for col in data_row) + tuple(None if pandas.isna(col) else col for col in data_row) for data_row in dataframe.itertuples(index=False) ] diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index ad40a6da6..7c83d3ec5 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -808,29 +808,60 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_json_generator(module_under_test): utcnow = datetime.datetime.utcnow() - df_data = collections.OrderedDict( - [ - ("a_series", [pandas.NA, 2, 3, 4]), - ("b_series", [0.1, float("NaN"), 0.3, 0.4]), - ("c_series", ["a", "b", pandas.NA, "d"]), - ("d_series", [utcnow, utcnow, utcnow, pandas.NaT]), - ("e_series", [True, False, True, None]), - ] - ) dataframe = pandas.DataFrame( - df_data, index=pandas.Index([4, 5, 6, 7], name="a_index") + { + "a_series": [1, 2, 3, 4], + "b_series": [0.1, float("NaN"), 0.3, 0.4], + "c_series": ["a", "b", pandas.NA, "d"], + "d_series": [utcnow, utcnow, utcnow, pandas.NaT], + "e_series": [True, False, True, None], + # Support nullable dtypes. + # https://github.com/googleapis/python-bigquery/issues/1815 + "boolean_series": pandas.Series( + [True, False, pandas.NA, False], dtype="boolean" + ), + "int64_series": pandas.Series([-1, pandas.NA, -3, -4], dtype="Int64"), + } ) - dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()}) + # Index is not included, even if it is not the default and has a name. + dataframe = dataframe.rename(index=lambda idx: idx + 4) + dataframe.index.name = "a_index" - rows = module_under_test.dataframe_to_json_generator(dataframe) + rows = list(module_under_test.dataframe_to_json_generator(dataframe)) expected = [ - {"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True}, - {"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False}, - {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True}, - {"a_series": 4, "b_series": 0.4, "c_series": "d"}, + { + "a_series": 1, + "b_series": 0.1, + "c_series": "a", + "d_series": utcnow, + "e_series": True, + "boolean_series": True, + "int64_series": -1, + }, + { + "a_series": 2, + "c_series": "b", + "d_series": utcnow, + "e_series": False, + "boolean_series": False, + }, + { + "a_series": 3, + "b_series": 0.3, + "d_series": utcnow, + "e_series": True, + "int64_series": -3, + }, + { + "a_series": 4, + "b_series": 0.4, + "c_series": "d", + "boolean_series": False, + "int64_series": -4, + }, ] - assert list(rows) == expected + assert rows == expected @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") From 89dfcb6469d22e78003a70371a0938a6856e033c Mon Sep 17 00:00:00 2001 From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com> Date: Mon, 12 Feb 2024 17:39:04 -0600 Subject: [PATCH 246/536] docs: update `client_query_w_named_params.py` to use `query_and_wait` API (#1782) * docs: update client_query_w_named_params.py to use query_and_wait API * Update client_query_w_named_params.py lint --------- Co-authored-by: Salem Boyland Co-authored-by: Kira --- samples/client_query_w_named_params.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/samples/client_query_w_named_params.py b/samples/client_query_w_named_params.py index 6dd72d44f..b7e59a81a 100644 --- a/samples/client_query_w_named_params.py +++ b/samples/client_query_w_named_params.py @@ -33,8 +33,10 @@ def client_query_w_named_params() -> None: bigquery.ScalarQueryParameter("min_word_count", "INT64", 250), ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + results = client.query_and_wait( + query, job_config=job_config + ) # Make an API request. - for row in query_job: + for row in results: print("{}: \t{}".format(row.word, row.word_count)) # [END bigquery_query_params_named] From 30f304b20e954359f4bd8ba440c3bbd541dbc561 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 20 Feb 2024 12:50:00 -0800 Subject: [PATCH 247/536] build(deps): bump cryptography from 42.0.0 to 42.0.2 in .kokoro (#1821) Source-Link: https://github.com/googleapis/synthtool/commit/8d392a55db44b00b4a9b995318051e334eecdcf1 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:a0c4463fcfd9893fc172a3b3db2b6ac0c7b94ec6ad458c7dcea12d9693615ac3 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 +-- .kokoro/requirements.txt | 66 +++++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 2aefd0e91..51213ca00 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:97b671488ad548ef783a452a9e1276ac10f144d5ae56d98cc4bf77ba504082b4 -# created: 2024-02-06T03:20:16.660474034Z + digest: sha256:a0c4463fcfd9893fc172a3b3db2b6ac0c7b94ec6ad458c7dcea12d9693615ac3 +# created: 2024-02-17T12:21:23.177926195Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 8c11c9f3e..f80bdcd62 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -93,39 +93,39 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==42.0.0 \ - --hash=sha256:0a68bfcf57a6887818307600c3c0ebc3f62fbb6ccad2240aa21887cda1f8df1b \ - --hash=sha256:146e971e92a6dd042214b537a726c9750496128453146ab0ee8971a0299dc9bd \ - --hash=sha256:14e4b909373bc5bf1095311fa0f7fcabf2d1a160ca13f1e9e467be1ac4cbdf94 \ - --hash=sha256:206aaf42e031b93f86ad60f9f5d9da1b09164f25488238ac1dc488334eb5e221 \ - --hash=sha256:3005166a39b70c8b94455fdbe78d87a444da31ff70de3331cdec2c568cf25b7e \ - --hash=sha256:324721d93b998cb7367f1e6897370644751e5580ff9b370c0a50dc60a2003513 \ - --hash=sha256:33588310b5c886dfb87dba5f013b8d27df7ffd31dc753775342a1e5ab139e59d \ - --hash=sha256:35cf6ed4c38f054478a9df14f03c1169bb14bd98f0b1705751079b25e1cb58bc \ - --hash=sha256:3ca482ea80626048975360c8e62be3ceb0f11803180b73163acd24bf014133a0 \ - --hash=sha256:56ce0c106d5c3fec1038c3cca3d55ac320a5be1b44bf15116732d0bc716979a2 \ - --hash=sha256:5a217bca51f3b91971400890905a9323ad805838ca3fa1e202a01844f485ee87 \ - --hash=sha256:678cfa0d1e72ef41d48993a7be75a76b0725d29b820ff3cfd606a5b2b33fda01 \ - --hash=sha256:69fd009a325cad6fbfd5b04c711a4da563c6c4854fc4c9544bff3088387c77c0 \ - --hash=sha256:6cf9b76d6e93c62114bd19485e5cb003115c134cf9ce91f8ac924c44f8c8c3f4 \ - --hash=sha256:74f18a4c8ca04134d2052a140322002fef535c99cdbc2a6afc18a8024d5c9d5b \ - --hash=sha256:85f759ed59ffd1d0baad296e72780aa62ff8a71f94dc1ab340386a1207d0ea81 \ - --hash=sha256:87086eae86a700307b544625e3ba11cc600c3c0ef8ab97b0fda0705d6db3d4e3 \ - --hash=sha256:8814722cffcfd1fbd91edd9f3451b88a8f26a5fd41b28c1c9193949d1c689dc4 \ - --hash=sha256:8fedec73d590fd30c4e3f0d0f4bc961aeca8390c72f3eaa1a0874d180e868ddf \ - --hash=sha256:9515ea7f596c8092fdc9902627e51b23a75daa2c7815ed5aa8cf4f07469212ec \ - --hash=sha256:988b738f56c665366b1e4bfd9045c3efae89ee366ca3839cd5af53eaa1401bce \ - --hash=sha256:a2a8d873667e4fd2f34aedab02ba500b824692c6542e017075a2efc38f60a4c0 \ - --hash=sha256:bd7cf7a8d9f34cc67220f1195884151426ce616fdc8285df9054bfa10135925f \ - --hash=sha256:bdce70e562c69bb089523e75ef1d9625b7417c6297a76ac27b1b8b1eb51b7d0f \ - --hash=sha256:be14b31eb3a293fc6e6aa2807c8a3224c71426f7c4e3639ccf1a2f3ffd6df8c3 \ - --hash=sha256:be41b0c7366e5549265adf2145135dca107718fa44b6e418dc7499cfff6b4689 \ - --hash=sha256:c310767268d88803b653fffe6d6f2f17bb9d49ffceb8d70aed50ad45ea49ab08 \ - --hash=sha256:c58115384bdcfe9c7f644c72f10f6f42bed7cf59f7b52fe1bf7ae0a622b3a139 \ - --hash=sha256:c640b0ef54138fde761ec99a6c7dc4ce05e80420262c20fa239e694ca371d434 \ - --hash=sha256:ca20550bb590db16223eb9ccc5852335b48b8f597e2f6f0878bbfd9e7314eb17 \ - --hash=sha256:d97aae66b7de41cdf5b12087b5509e4e9805ed6f562406dfcf60e8481a9a28f8 \ - --hash=sha256:e9326ca78111e4c645f7e49cbce4ed2f3f85e17b61a563328c85a5208cf34440 +cryptography==42.0.2 \ + --hash=sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380 \ + --hash=sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589 \ + --hash=sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea \ + --hash=sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65 \ + --hash=sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a \ + --hash=sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3 \ + --hash=sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008 \ + --hash=sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1 \ + --hash=sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2 \ + --hash=sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635 \ + --hash=sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2 \ + --hash=sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90 \ + --hash=sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee \ + --hash=sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a \ + --hash=sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242 \ + --hash=sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12 \ + --hash=sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2 \ + --hash=sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d \ + --hash=sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be \ + --hash=sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee \ + --hash=sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6 \ + --hash=sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529 \ + --hash=sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929 \ + --hash=sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1 \ + --hash=sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6 \ + --hash=sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a \ + --hash=sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446 \ + --hash=sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9 \ + --hash=sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888 \ + --hash=sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4 \ + --hash=sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33 \ + --hash=sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f # via # gcp-releasetool # secretstorage From 2542bd3db3451cd8057ca0618526a7609de50cb5 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 21 Feb 2024 08:47:45 -0500 Subject: [PATCH 248/536] chore: fix minor typos (#1820) --- google/cloud/bigquery/_job_helpers.py | 2 +- google/cloud/bigquery/magics/magics.py | 2 +- google/cloud/bigquery/opentelemetry_tracing.py | 2 +- tests/system/test_query.py | 2 +- tests/unit/test__job_helpers.py | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 6debc377b..0692c9b65 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -54,7 +54,7 @@ # The purpose of _TIMEOUT_BUFFER_MILLIS is to allow the server-side timeout to -# happen before the client-side timeout. This is not strictly neccessary, as the +# happen before the client-side timeout. This is not strictly necessary, as the # client retries client-side timeouts, but the hope by making the server-side # timeout slightly shorter is that it can save the server from some unncessary # processing time. diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index b7c685d9a..8464c8792 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -288,7 +288,7 @@ def _handle_error(error, destination_var=None): Args: error (Exception): - An exception that ocurred during the query execution. + An exception that occurred during the query execution. destination_var (Optional[str]): The name of the IPython session variable to store the query job. """ diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index be02c1686..e2a05e4d0 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -90,7 +90,7 @@ def _get_final_span_attributes(attributes=None, client=None, job_ref=None): """Compiles attributes from: client, job_ref, user-provided attributes. Attributes from all of these sources are merged together. Note the - attributes are added sequentially based on perceived order of precendence: + attributes are added sequentially based on perceived order of precedence: i.e. attributes added last may overwrite attributes added earlier. Args: diff --git a/tests/system/test_query.py b/tests/system/test_query.py index b8e0c00da..82be40693 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -477,7 +477,7 @@ def test_query_error_w_api_method_default(bigquery_client: bigquery.Client): """Test that an exception is not thrown until fetching the results. For backwards compatibility, jobs.insert is the default API method. With - jobs.insert, a failed query job is "sucessfully" created. An exception is + jobs.insert, a failed query job is "successfully" created. An exception is thrown when fetching the results. """ diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 404a546ff..c30964c57 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -711,7 +711,7 @@ def test_query_and_wait_caches_completed_query_results_one_page(): {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, ], - # Even though totalRows > len(rows), we should use the presense of a + # Even though totalRows > len(rows), we should use the presence of a # next page token to decide if there are any more pages. "totalRows": 8, } @@ -828,7 +828,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, ], - # Even though totalRows <= len(rows), we should use the presense of a + # Even though totalRows <= len(rows), we should use the presence of a # next page token to decide if there are any more pages. "totalRows": 2, "pageToken": "page-2", @@ -981,7 +981,7 @@ def test_query_and_wait_incomplete_query(): {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, ], - # Even though totalRows <= len(rows), we should use the presense of a + # Even though totalRows <= len(rows), we should use the presence of a # next page token to decide if there are any more pages. "totalRows": 2, "pageToken": "page-2", From a208b6a2eee3dace87118c4da49786ce478c5fcf Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 27 Feb 2024 12:48:46 -0500 Subject: [PATCH 249/536] build(deps): bump cryptography from 42.0.2 to 42.0.4 in .kokoro (#1833) Source-Link: https://github.com/googleapis/synthtool/commit/d895aec3679ad22aa120481f746bf9f2f325f26f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:98f3afd11308259de6e828e37376d18867fd321aba07826e29e4f8d9cab56bad Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 +-- .kokoro/requirements.txt | 66 +++++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 51213ca00..e4e943e02 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:a0c4463fcfd9893fc172a3b3db2b6ac0c7b94ec6ad458c7dcea12d9693615ac3 -# created: 2024-02-17T12:21:23.177926195Z + digest: sha256:98f3afd11308259de6e828e37376d18867fd321aba07826e29e4f8d9cab56bad +# created: 2024-02-27T15:56:18.442440378Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index f80bdcd62..bda8e38c4 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -93,39 +93,39 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==42.0.2 \ - --hash=sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380 \ - --hash=sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589 \ - --hash=sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea \ - --hash=sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65 \ - --hash=sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a \ - --hash=sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3 \ - --hash=sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008 \ - --hash=sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1 \ - --hash=sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2 \ - --hash=sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635 \ - --hash=sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2 \ - --hash=sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90 \ - --hash=sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee \ - --hash=sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a \ - --hash=sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242 \ - --hash=sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12 \ - --hash=sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2 \ - --hash=sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d \ - --hash=sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be \ - --hash=sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee \ - --hash=sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6 \ - --hash=sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529 \ - --hash=sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929 \ - --hash=sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1 \ - --hash=sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6 \ - --hash=sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a \ - --hash=sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446 \ - --hash=sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9 \ - --hash=sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888 \ - --hash=sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4 \ - --hash=sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33 \ - --hash=sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f +cryptography==42.0.4 \ + --hash=sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b \ + --hash=sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce \ + --hash=sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88 \ + --hash=sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7 \ + --hash=sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20 \ + --hash=sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9 \ + --hash=sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff \ + --hash=sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1 \ + --hash=sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764 \ + --hash=sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b \ + --hash=sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298 \ + --hash=sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1 \ + --hash=sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824 \ + --hash=sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257 \ + --hash=sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a \ + --hash=sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129 \ + --hash=sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb \ + --hash=sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929 \ + --hash=sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854 \ + --hash=sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52 \ + --hash=sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923 \ + --hash=sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885 \ + --hash=sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0 \ + --hash=sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd \ + --hash=sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2 \ + --hash=sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18 \ + --hash=sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b \ + --hash=sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992 \ + --hash=sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74 \ + --hash=sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660 \ + --hash=sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925 \ + --hash=sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449 # via # gcp-releasetool # secretstorage From d62cabbf115637ecbaf8cc378f39329a5ae74c26 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Tue, 27 Feb 2024 12:18:10 -0800 Subject: [PATCH 250/536] feat: support slot_ms in QueryPlanEntry (#1831) --- google/cloud/bigquery/job/query.py | 5 +++++ tests/unit/job/test_query_stats.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index ac0c51973..e45a46894 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -2263,6 +2263,11 @@ def steps(self): for step in self._properties.get("steps", []) ] + @property + def slot_ms(self): + """Optional[int]: Slot-milliseconds used by the stage.""" + return _helpers._int_or_none(self._properties.get("slotMs")) + class TimelineEntry(object): """TimelineEntry represents progress of a query job at a particular diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py index bdd0fb627..61b278d43 100644 --- a/tests/unit/job/test_query_stats.py +++ b/tests/unit/job/test_query_stats.py @@ -261,6 +261,7 @@ class TestQueryPlanEntry(_Base): STATUS = "STATUS" SHUFFLE_OUTPUT_BYTES = 1024 SHUFFLE_OUTPUT_BYTES_SPILLED = 1 + SLOT_MS = 25 START_RFC3339_MICROS = "2018-04-01T00:00:00.000000Z" END_RFC3339_MICROS = "2018-04-01T00:00:04.000000Z" @@ -305,6 +306,7 @@ def test_from_api_repr_empty(self): self.assertIsNone(entry.shuffle_output_bytes) self.assertIsNone(entry.shuffle_output_bytes_spilled) self.assertEqual(entry.steps, []) + self.assertIsNone(entry.slot_ms) def test_from_api_repr_normal(self): from google.cloud.bigquery.job import QueryPlanEntryStep @@ -348,6 +350,7 @@ def test_from_api_repr_normal(self): "substeps": TestQueryPlanEntryStep.SUBSTEPS, } ], + "slotMs": self.SLOT_MS, } klass = self._get_target_class() @@ -366,6 +369,7 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) self.assertEqual(entry.status, self.STATUS) self.assertEqual(entry.steps, steps) + self.assertEqual(entry.slot_ms, self.SLOT_MS) def test_start(self): from google.cloud._helpers import _RFC3339_MICROS From 4dfb920b106784e98f343b3e3fc8e8ff70c50560 Mon Sep 17 00:00:00 2001 From: Kira Date: Tue, 27 Feb 2024 14:03:44 -0800 Subject: [PATCH 251/536] fix: keyword rendering and docstring improvements (#1829) * fix: keyword rendering and docstring improvements * fix error * small lint fix --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/client.py | 174 +++++++++++++++----------------- 1 file changed, 84 insertions(+), 90 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 4708e753b..a871dc003 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1716,20 +1716,24 @@ def delete_job_metadata( :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. Args: - job_id: Job or job identifier. - - Keyword Arguments: - project: + job_id (Union[ \ + str, \ + LoadJob, \ + CopyJob, \ + ExtractJob, \ + QueryJob \ + ]): Job or job identifier. + project (Optional[str]): ID of the project which owns the job (defaults to the client's project). - location: + location (Optional[str]): Location where the job was run. Ignored if ``job_id`` is a job object. - retry: + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok: + not_found_ok (Optional[bool]): Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the job. """ @@ -1970,12 +1974,10 @@ def create_job( timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. + Args: job_config (dict): configuration job representation returned from the API. - - Keyword Arguments: - retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -2066,10 +2068,14 @@ def get_job( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get Args: - job_id: + job_id (Union[ \ + str, \ + job.LoadJob, \ + job.CopyJob, \ + job.ExtractJob, \ + job.QueryJob \ + ]): Job identifier. - - Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). location (Optional[str]): @@ -2137,8 +2143,6 @@ def cancel_job( google.cloud.bigquery.job.ExtractJob, \ google.cloud.bigquery.job.QueryJob \ ]): Job identifier. - - Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). location (Optional[str]): @@ -2340,8 +2344,6 @@ def load_table_from_uri( in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - - Keyword Arguments: job_id (Optional[str]): Name of the job. job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. @@ -2415,39 +2417,42 @@ def load_table_from_file( returns a :class:`~google.cloud.bigquery.job.LoadJob`. Args: - file_obj: + file_obj (IO[bytes]): A file handle opened in binary mode for reading. - destination: + destination (Union[Table, \ + TableReference, \ + TableListItem, \ + str \ + ]): Table into which data is to be loaded. If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - - Keyword Arguments: - rewind: + rewind (Optional[bool]): If True, seek to the beginning of the file handle before - reading the file. - size: + reading the file. Defaults to False. + size (Optional[int]): The number of bytes to read from the file handle. If size is ``None`` or large, resumable upload will be used. Otherwise, multipart upload will be used. - num_retries: Number of upload retries. Defaults to 6. - job_id: Name of the job. - job_id_prefix: + num_retries (Optional[int]): Number of upload retries. Defaults to 6. + job_id (Optional[str]): Name of the job. + job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location: + location (Optional[str]): Location where to run the job. Must match the location of the destination table. - project: + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config: + job_config (Optional[LoadJobConfig]): Extra configuration options for the job. - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. + Defaults to None. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. @@ -2535,9 +2540,13 @@ def load_table_from_dataframe( https://github.com/googleapis/python-bigquery/issues/19 Args: - dataframe: + dataframe (pandas.Dataframe): A :class:`~pandas.DataFrame` containing the data to load. - destination: + destination (Union[ \ + Table, \ + TableReference, \ + str \ + ]): The destination table to use for loading the data. If it is an existing table, the schema of the :class:`~pandas.DataFrame` must match the schema of the destination table. If the table @@ -2547,21 +2556,19 @@ def load_table_from_dataframe( If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - - Keyword Arguments: - num_retries: Number of upload retries. - job_id: Name of the job. - job_id_prefix: + num_retries (Optional[int]): Number of upload retries. Defaults to 6. + job_id (Optional[str]): Name of the job. + job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location: + location (Optional[str]): Location where to run the job. Must match the location of the destination table. - project: + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config: + job_config (Optional[LoadJobConfig]): Extra configuration options for the job. To override the default pandas data type conversions, supply @@ -2578,9 +2585,10 @@ def load_table_from_dataframe( :attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are supported. - parquet_compression: + parquet_compression (Optional[str]): [Beta] The compression method to use if intermittently serializing ``dataframe`` to a parquet file. + Defaults to "snappy". The argument is directly passed as the ``compression`` argument to the underlying ``pyarrow.parquet.write_table()`` @@ -2591,10 +2599,11 @@ def load_table_from_dataframe( passed as the ``compression`` argument to the underlying ``DataFrame.to_parquet()`` method. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet - timeout: + timeout (Optional[flaot]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. + Defaults to None. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. @@ -2784,32 +2793,36 @@ def load_table_from_json( client = bigquery.Client() client.load_table_from_file(data_as_file, ...) - destination: + destination (Union[ \ + Table, \ + TableReference, \ + TableListItem, \ + str \ + ]): Table into which data is to be loaded. If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. - - Keyword Arguments: - num_retries: Number of upload retries. - job_id: Name of the job. - job_id_prefix: + num_retries (Optional[int]): Number of upload retries. Defaults to 6. + job_id (Optional[str]): Name of the job. + job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location: + location (Optional[str]): Location where to run the job. Must match the location of the destination table. - project: + project (Optional[str]): Project ID of the project of where to run the job. Defaults to the client's project. - job_config: + job_config (Optional[LoadJobConfig]): Extra configuration options for the job. The ``source_format`` setting is always set to :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`. - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. + Defaults to None. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. @@ -2885,23 +2898,19 @@ def _do_resumable_upload( """Perform a resumable upload. Args: - stream: A bytes IO object open for reading. - - metadata: The metadata associated with the upload. - - num_retries: + stream (IO[bytes]): A bytes IO object open for reading. + metadata (Mapping[str, str]): The metadata associated with the upload. + num_retries (int): Number of upload retries. (Deprecated: This argument will be removed in a future release.) - - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. - - project: + project (Optional[str]): Project ID of the project of where to run the upload. Defaults to the client's project. @@ -2929,23 +2938,19 @@ def _initiate_resumable_upload( """Initiate a resumable upload. Args: - stream: A bytes IO object open for reading. - - metadata: The metadata associated with the upload. - - num_retries: + stream (IO[bytes]): A bytes IO object open for reading. + metadata (Mapping[str, str]): The metadata associated with the upload. + num_retries (int): Number of upload retries. (Deprecated: This argument will be removed in a future release.) - - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. - - project: + project (Optional[str]): Project ID of the project of where to run the upload. Defaults to the client's project. @@ -3005,28 +3010,23 @@ def _do_multipart_upload( """Perform a multipart upload. Args: - stream: A bytes IO object open for reading. - - metadata: The metadata associated with the upload. - - size: + stream (IO[bytes]): A bytes IO object open for reading. + metadata (Mapping[str, str]): The metadata associated with the upload. + size (int): The number of bytes to be uploaded (which will be read from ``stream``). If not provided, the upload will be concluded once ``stream`` is exhausted (or :data:`None`). - - num_retries: + num_retries (int): Number of upload retries. (Deprecated: This argument will be removed in a future release.) - - timeout: + timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Depending on the retry strategy, a request may be repeated several times using the same timeout each time. Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. - - project: + project (Optional[str]): Project ID of the project of where to run the upload. Defaults to the client's project. @@ -3118,8 +3118,6 @@ def copy_table( str, \ ]): Table into which data is to be copied. - - Keyword Arguments: job_id (Optional[str]): The ID of the job. job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. @@ -3216,8 +3214,6 @@ def extract_table( URIs of Cloud Storage file(s) into which table data is to be extracted; in format ``gs:///``. - - Keyword Arguments: job_id (Optional[str]): The ID of the job. job_id_prefix (Optional[str]): The user-provided prefix for a randomly generated job ID. @@ -3306,8 +3302,6 @@ def query( query (str): SQL query to be executed. Defaults to the standard SQL dialect. Use the ``job_config`` parameter to change dialects. - - Keyword Arguments: job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): Extra configuration options for the job. To override any options that were previously set in From b099c32a83946a347560f6a71d08c3f263e56cb6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 28 Feb 2024 16:42:45 -0500 Subject: [PATCH 252/536] docs(samples): updates to urllib3 constraint for Python 3.7 (#1834) * fix: updates to urllib3 constraint for Python 3.7 * refine urllib3 constraint * revises geo reqs and rolls back contraints * tweaking the geo sample requirements * more tweaks to geo reqs * more tweaks, testing arbitrary equality, 2.x version * tweak google-auth --- samples/geography/requirements.txt | 5 +++-- testing/constraints-3.7.txt | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d6cea7ec5..332911345 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas==0.13.2; python_version == '3.8' geopandas==0.14.1; python_version >= '3.9' google-api-core==2.11.1 -google-auth==2.22.0 +google-auth==2.25.2 google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-cloud-core==2.3.3 @@ -45,4 +45,5 @@ Shapely==2.0.2 six==1.16.0 typing-extensions==4.7.1 typing-inspect==0.9.0 -urllib3==1.26.18 +urllib3===1.26.18; python_version == '3.7' +urllib3==2.2.1; python_version >= '3.8' \ No newline at end of file diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 2ea482e8b..9f71bf11a 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -27,4 +27,4 @@ python-dateutil==2.7.3 requests==2.21.0 Shapely==1.8.4 six==1.13.0 -tqdm==4.7.4 +tqdm==4.7.4 \ No newline at end of file From 073724c4d6ee512fb008211bef1bd6d3a1fbaa78 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 29 Feb 2024 16:01:12 +0100 Subject: [PATCH 253/536] chore(deps): update all dependencies (#1640) * chore(deps): update all dependencies * revert geopandas * See https://cffi.readthedocs.io/en/stable/whatsnew.html#v1-16-0 * See https://pandas.pydata.org/docs/dev/whatsnew/v2.1.0.html#backwards-incompatible-api-changes * See https://matplotlib.org/stable/api/prev_api_changes/api_changes_3.8.0.html#increase-to-minimum-supported-versions-of-dependencies * Remove libcst which is not used * See https://github.com/python/typing_extensions/releases/tag/4.8.0 --------- Co-authored-by: Anthonios Partheniou Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 4 +- samples/desktopapp/requirements.txt | 4 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 47 ++++++++++++------------ samples/magics/requirements-test.txt | 4 +- samples/magics/requirements.txt | 11 +++--- samples/notebooks/requirements-test.txt | 4 +- samples/notebooks/requirements.txt | 14 ++++--- samples/snippets/requirements-test.txt | 4 +- samples/snippets/requirements.txt | 2 +- 10 files changed, 50 insertions(+), 46 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 514f09705..fc926cd7c 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.3.3 -pytest==7.4.0 +google-cloud-testutils==1.4.0 +pytest==7.4.3 mock==5.1.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index a5b3ad130..8d82d4930 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.11.4 -google-auth-oauthlib==1.0.0 +google-cloud-bigquery==3.14.1 +google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 6585a560a..7749d1f94 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.4.0 +pytest==7.4.3 mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 332911345..47e7cc56e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,49 +1,50 @@ attrs==23.1.0 -certifi==2023.7.22 -cffi==1.15.1 -charset-normalizer==3.2.0 -click==8.1.6 +certifi==2023.11.17 +cffi===1.15.1; python_version == '3.7' +cffi==1.16.0; python_version >= '3.8' +charset-normalizer==3.3.2 +click==8.1.7 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.1.1 +db-dtypes==1.2.0 Fiona==1.9.5 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' -geopandas==0.13.2; python_version == '3.8' +geopandas===0.13.2; python_version == '3.8' geopandas==0.14.1; python_version >= '3.9' -google-api-core==2.11.1 +google-api-core==2.15.0 google-auth==2.25.2 -google-cloud-bigquery==3.11.4 -google-cloud-bigquery-storage==2.22.0 -google-cloud-core==2.3.3 +google-cloud-bigquery==3.14.1 +google-cloud-bigquery-storage==2.24.0 +google-cloud-core==2.4.1 google-crc32c==1.5.0 -google-resumable-media==2.5.0 -googleapis-common-protos==1.60.0 -grpcio==1.59.0 -idna==3.4 -libcst==1.0.1; python_version == '3.7' -libcst==1.1.0; python_version >= '3.8' +google-resumable-media==2.7.0 +googleapis-common-protos==1.62.0 +grpcio==1.60.0 +idna==3.6 munch==4.0.0 mypy-extensions==1.0.0 -packaging==23.1 +packaging==23.2 pandas===1.3.5; python_version == '3.7' -pandas==2.0.3; python_version >= '3.8' -proto-plus==1.22.3 +pandas===2.0.3; python_version == '3.8' +pandas==2.1.0; python_version >= '3.9' +proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==14.0.1; python_version >= '3.8' -pyasn1==0.5.0 +pyasn1==0.5.1 pyasn1-modules==0.3.0 pycparser==2.21 pyparsing==3.1.1 python-dateutil==2.8.2 -pytz==2023.3 +pytz==2023.3.post1 PyYAML==6.0.1 requests==2.31.0 rsa==4.9 Shapely==2.0.2 six==1.16.0 -typing-extensions==4.7.1 +typing-extensions===4.7.1; python_version == '3.7' +typing-extensions==4.9.0; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.1; python_version >= '3.8' \ No newline at end of file +urllib3==2.2.1; python_version >= '3.8' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 514f09705..fc926cd7c 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.3.3 -pytest==7.4.0 +google-cloud-testutils==1.4.0 +pytest==7.4.3 mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index c8f6b2765..869d3b4d5 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,8 +1,9 @@ -db-dtypes==1.1.1 -google.cloud.bigquery==3.11.4 -google-cloud-bigquery-storage==2.22.0 +db-dtypes==1.2.0 +google.cloud.bigquery==3.14.1 +google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.14.0; python_version >= '3.9' +ipython==8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' -pandas==2.0.3; python_version >= '3.8' +pandas===2.0.3; python_version == '3.8' +pandas==2.1.0; python_version >= '3.9' diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 514f09705..fc926cd7c 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.3.3 -pytest==7.4.0 +google-cloud-testutils==1.4.0 +pytest==7.4.3 mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 22c46297f..e8839e1fe 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,10 +1,12 @@ -db-dtypes==1.1.1 -google-cloud-bigquery==3.11.4 -google-cloud-bigquery-storage==2.22.0 +db-dtypes==1.2.0 +google-cloud-bigquery==3.14.1 +google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.14.0; python_version >= '3.9' +ipython==8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.2; python_version >= '3.8' +matplotlib===3.7.4; python_version == '3.8' +matplotlib==3.8.2; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' -pandas==2.0.3; python_version >= '3.8' +pandas===2.0.3; python_version == '3.8' +pandas==2.1.0; python_version >= '3.9' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 514f09705..fc926cd7c 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.3.3 -pytest==7.4.0 +google-cloud-testutils==1.4.0 +pytest==7.4.3 mock==5.1.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f49c7494f..365d584c7 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-bigquery==3.11.4 \ No newline at end of file +google-cloud-bigquery==3.14.1 \ No newline at end of file From e81a13caff4fa892945b2408a2074b06e35d8686 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 4 Mar 2024 11:03:09 -0600 Subject: [PATCH 254/536] chore(main): release 3.18.0 (#1817) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7bedb5cf6..350787512 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.18.0](https://github.com/googleapis/python-bigquery/compare/v3.17.2...v3.18.0) (2024-02-29) + + +### Features + +* Support nullable boolean and Int64 dtypes in `insert_rows_from_dataframe` ([#1816](https://github.com/googleapis/python-bigquery/issues/1816)) ([ab0cf4c](https://github.com/googleapis/python-bigquery/commit/ab0cf4cc03292f62b56a8813cfb7681daa87f872)) +* Support slot_ms in QueryPlanEntry ([#1831](https://github.com/googleapis/python-bigquery/issues/1831)) ([d62cabb](https://github.com/googleapis/python-bigquery/commit/d62cabbf115637ecbaf8cc378f39329a5ae74c26)) + + +### Bug Fixes + +* Keyword rendering and docstring improvements ([#1829](https://github.com/googleapis/python-bigquery/issues/1829)) ([4dfb920](https://github.com/googleapis/python-bigquery/commit/4dfb920b106784e98f343b3e3fc8e8ff70c50560)) + + +### Documentation + +* **samples:** Updates to urllib3 constraint for Python 3.7 ([#1834](https://github.com/googleapis/python-bigquery/issues/1834)) ([b099c32](https://github.com/googleapis/python-bigquery/commit/b099c32a83946a347560f6a71d08c3f263e56cb6)) +* Update `client_query_w_named_params.py` to use `query_and_wait` API ([#1782](https://github.com/googleapis/python-bigquery/issues/1782)) ([89dfcb6](https://github.com/googleapis/python-bigquery/commit/89dfcb6469d22e78003a70371a0938a6856e033c)) + ## [3.17.2](https://github.com/googleapis/python-bigquery/compare/v3.17.1...v3.17.2) (2024-01-30) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 771b77a38..89024cc08 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.17.2" +__version__ = "3.18.0" From b359a9a55936a759a36aa69c5e5b014685e1fca6 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 4 Mar 2024 11:06:07 -0800 Subject: [PATCH 255/536] feat: support RANGE query parameters (#1827) * feat: RANGE query parameters and unit tests * unit test * unit test coverage * lint * lint * lint * system test * fix system test * ajust init items order * fix typos and improve docstrings --- benchmark/benchmark.py | 2 +- google/cloud/bigquery/__init__.py | 4 + google/cloud/bigquery/query.py | 297 ++++++++++++++++ tests/system/test_query.py | 33 ++ tests/unit/test_query.py | 548 ++++++++++++++++++++++++++++++ 5 files changed, 883 insertions(+), 1 deletion(-) diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 30e294baa..d7dc78678 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -231,7 +231,7 @@ def _is_datetime_min(time_str: str) -> bool: def _summary(run: dict) -> str: - """Coverts run dict to run summary string.""" + """Converts run dict to run summary string.""" no_val = "NODATA" output = ["QUERYTIME "] diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 1ea056eb8..caf81d9aa 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -83,6 +83,8 @@ from google.cloud.bigquery.query import ConnectionProperty from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import ScalarQueryParameterType +from google.cloud.bigquery.query import RangeQueryParameter +from google.cloud.bigquery.query import RangeQueryParameterType from google.cloud.bigquery.query import SqlParameterScalarTypes from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import StructQueryParameterType @@ -122,10 +124,12 @@ "ArrayQueryParameter", "ScalarQueryParameter", "StructQueryParameter", + "RangeQueryParameter", "ArrayQueryParameterType", "ScalarQueryParameterType", "SqlParameterScalarTypes", "StructQueryParameterType", + "RangeQueryParameterType", # Datasets "Dataset", "DatasetReference", diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index a06ece503..9c9402b74 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -30,6 +30,8 @@ Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date] ] +_RANGE_ELEMENT_TYPE_STR = {"TIMESTAMP", "DATETIME", "DATE"} + class ConnectionProperty: """A connection-level property to customize query behavior. @@ -362,6 +364,129 @@ def __repr__(self): return f"{self.__class__.__name__}({items}{name}{description})" +class RangeQueryParameterType(_AbstractQueryParameterType): + """Type representation for range query parameters. + + Args: + type_ (Union[ScalarQueryParameterType, str]): + Type of range element, must be one of 'TIMESTAMP', 'DATETIME', or + 'DATE'. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + @classmethod + def _parse_range_element_type(self, type_): + """Helper method that parses the input range element type, which may + be a string, or a ScalarQueryParameterType object. + + Returns: + google.cloud.bigquery.query.ScalarQueryParameterType: Instance + """ + if isinstance(type_, str): + if type_ not in _RANGE_ELEMENT_TYPE_STR: + raise ValueError( + "If given as a string, range element type must be one of " + "'TIMESTAMP', 'DATE', or 'DATETIME'." + ) + return ScalarQueryParameterType(type_) + elif isinstance(type_, ScalarQueryParameterType): + if type_._type not in _RANGE_ELEMENT_TYPE_STR: + raise ValueError( + "If given as a ScalarQueryParameter object, range element " + "type must be one of 'TIMESTAMP', 'DATE', or 'DATETIME' " + "type." + ) + return type_ + else: + raise ValueError( + "range_type must be a string or ScalarQueryParameter object, " + "of 'TIMESTAMP', 'DATE', or 'DATETIME' type." + ) + + def __init__(self, type_, *, name=None, description=None): + self.type_ = self._parse_range_element_type(type_) + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.RangeQueryParameterType: Instance + """ + type_ = resource["rangeElementType"]["type"] + name = resource.get("name") + description = resource.get("description") + + return cls(type_, name=name, description=description) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. + return { + "type": "RANGE", + "rangeElementType": self.type_.to_api_repr(), + } + + def with_name(self, new_name: Union[str, None]): + """Return a copy of the instance with ``name`` set to ``new_name``. + + Args: + name (Union[str, None]): + The new name of the range query parameter type. If ``None``, + the existing name is cleared. + + Returns: + google.cloud.bigquery.query.RangeQueryParameterType: + A new instance with updated name. + """ + return type(self)(self.type_, name=new_name, description=self.description) + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self.type_!r}{name}{description})" + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + Tuple: The contents of this + :class:`~google.cloud.bigquery.query.RangeQueryParameterType`. + """ + type_ = self.type_.to_api_repr() + return (self.name, type_, self.description) + + def __eq__(self, other): + if not isinstance(other, RangeQueryParameterType): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + class _AbstractQueryParameter(object): """Base class for named / positional query parameters.""" @@ -811,6 +936,178 @@ def __repr__(self): return "StructQueryParameter{}".format(self._key()) +class RangeQueryParameter(_AbstractQueryParameter): + """Named / positional query parameters for range values. + + Args: + range_element_type (Union[str, RangeQueryParameterType]): + The type of range elements. It must be one of 'TIMESTAMP', + 'DATE', or 'DATETIME'. + + start (Optional[Union[ScalarQueryParameter, str]]): + The start of the range value. Must be the same type as + range_element_type. If not provided, it's interpreted as UNBOUNDED. + + end (Optional[Union[ScalarQueryParameter, str]]): + The end of the range value. Must be the same type as + range_element_type. If not provided, it's interpreted as UNBOUNDED. + + name (Optional[str]): + Parameter name, used via ``@foo`` syntax. If None, the + parameter can only be addressed via position (``?``). + """ + + @classmethod + def _parse_range_element_type(self, range_element_type): + if isinstance(range_element_type, str): + if range_element_type not in _RANGE_ELEMENT_TYPE_STR: + raise ValueError( + "If given as a string, range_element_type must be one of " + f"'TIMESTAMP', 'DATE', or 'DATETIME'. Got {range_element_type}." + ) + return RangeQueryParameterType(range_element_type) + elif isinstance(range_element_type, RangeQueryParameterType): + if range_element_type.type_._type not in _RANGE_ELEMENT_TYPE_STR: + raise ValueError( + "If given as a RangeQueryParameterType object, " + "range_element_type must be one of 'TIMESTAMP', 'DATE', " + "or 'DATETIME' type." + ) + return range_element_type + else: + raise ValueError( + "range_element_type must be a string or " + "RangeQueryParameterType object, of 'TIMESTAMP', 'DATE', " + "or 'DATETIME' type. Got " + f"{type(range_element_type)}:{range_element_type}" + ) + + @classmethod + def _serialize_range_element_value(self, value, type_): + if value is None or isinstance(value, str): + return value + else: + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) + if converter is not None: + return converter(value) # type: ignore + else: + raise ValueError( + f"Cannot convert range element value from type {type_}, " + "must be one of the strings 'TIMESTAMP', 'DATE' " + "'DATETIME' or a RangeQueryParameterType object." + ) + + def __init__( + self, + range_element_type, + start=None, + end=None, + name=None, + ): + self.name = name + self.range_element_type = self._parse_range_element_type(range_element_type) + print(self.range_element_type.type_._type) + self.start = start + self.end = end + + @classmethod + def positional( + cls, range_element_type, start=None, end=None + ) -> "RangeQueryParameter": + """Factory for positional parameters. + + Args: + range_element_type (Union[str, RangeQueryParameterType]): + The type of range elements. It must be one of `'TIMESTAMP'`, + `'DATE'`, or `'DATETIME'`. + + start (Optional[Union[ScalarQueryParameter, str]]): + The start of the range value. Must be the same type as + range_element_type. If not provided, it's interpreted as + UNBOUNDED. + + end (Optional[Union[ScalarQueryParameter, str]]): + The end of the range value. Must be the same type as + range_element_type. If not provided, it's interpreted as + UNBOUNDED. + + Returns: + google.cloud.bigquery.query.RangeQueryParameter: Instance without + name. + """ + return cls(range_element_type, start, end) + + @classmethod + def from_api_repr(cls, resource: dict) -> "RangeQueryParameter": + """Factory: construct parameter from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.RangeQueryParameter: Instance + """ + name = resource.get("name") + range_element_type = ( + resource.get("parameterType", {}).get("rangeElementType", {}).get("type") + ) + range_value = resource.get("parameterValue", {}).get("rangeValue", {}) + start = range_value.get("start", {}).get("value") + end = range_value.get("end", {}).get("value") + + return cls(range_element_type, start=start, end=end, name=name) + + def to_api_repr(self) -> dict: + """Construct JSON API representation for the parameter. + + Returns: + Dict: JSON mapping + """ + range_element_type = self.range_element_type.to_api_repr() + type_ = self.range_element_type.type_._type + start = self._serialize_range_element_value(self.start, type_) + end = self._serialize_range_element_value(self.end, type_) + resource = { + "parameterType": range_element_type, + "parameterValue": { + "rangeValue": { + "start": {"value": start}, + "end": {"value": end}, + }, + }, + } + + # distinguish between name not provided vs. name being empty string + if self.name is not None: + resource["name"] = self.name + + return resource + + def _key(self): + """A tuple key that uniquely describes this field. + + Used to compute this instance's hashcode and evaluate equality. + + Returns: + Tuple: The contents of this + :class:`~google.cloud.bigquery.query.RangeQueryParameter`. + """ + + range_element_type = self.range_element_type.to_api_repr() + return (self.name, range_element_type, self.start, self.end) + + def __eq__(self, other): + if not isinstance(other, RangeQueryParameter): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return "RangeQueryParameter{}".format(self._key()) + + class SqlParameterScalarTypes: """Supported scalar SQL query parameter types as type objects.""" diff --git a/tests/system/test_query.py b/tests/system/test_query.py index 82be40693..0494272d9 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -26,6 +26,7 @@ from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import StructQueryParameterType +from google.cloud.bigquery.query import RangeQueryParameter @pytest.fixture(params=["INSERT", "QUERY"]) @@ -422,6 +423,38 @@ def test_query_statistics(bigquery_client, query_api_method): ) ], ), + ( + "SELECT @range_date", + "[2016-12-05, UNBOUNDED)", + [ + RangeQueryParameter( + name="range_date", + range_element_type="DATE", + start=datetime.date(2016, 12, 5), + ) + ], + ), + ( + "SELECT @range_datetime", + "[2016-12-05T00:00:00, UNBOUNDED)", + [ + RangeQueryParameter( + name="range_datetime", + range_element_type="DATETIME", + start=datetime.datetime(2016, 12, 5), + ) + ], + ), + ( + "SELECT @range_unbounded", + "[UNBOUNDED, UNBOUNDED)", + [ + RangeQueryParameter( + name="range_unbounded", + range_element_type="DATETIME", + ) + ], + ), ), ) def test_query_parameters( diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 1704abac7..f511bf28d 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -376,6 +376,100 @@ def test_repr_all_optional_attrs(self): self.assertEqual(repr(param_type), expected) +class Test_RangeQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import RangeQueryParameterType + + return RangeQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_str(self): + param_type = self._make_one("DATE", name="foo", description="bar") + self.assertEqual(param_type.type_._type, "DATE") + self.assertEqual(param_type.name, "foo") + self.assertEqual(param_type.description, "bar") + + def test_ctor_type(self): + from google.cloud.bigquery import ScalarQueryParameterType + + scalar_type = ScalarQueryParameterType("DATE") + param_type = self._make_one(scalar_type, name="foo", description="bar") + self.assertEqual(param_type.type_._type, "DATE") + self.assertEqual(param_type.name, "foo") + self.assertEqual(param_type.description, "bar") + + def test_ctor_unsupported_type_str(self): + with self.assertRaises(ValueError): + self._make_one("TIME") + + def test_ctor_unsupported_type_type(self): + from google.cloud.bigquery import ScalarQueryParameterType + + scalar_type = ScalarQueryParameterType("TIME") + with self.assertRaises(ValueError): + self._make_one(scalar_type) + + def test_ctor_wrong_type(self): + with self.assertRaises(ValueError): + self._make_one(None) + + def test_from_api_repr(self): + RESOURCE = { + "type": "RANGE", + "rangeElementType": {"type": "DATE"}, + } + + klass = self._get_target_class() + result = klass.from_api_repr(RESOURCE) + self.assertEqual(result.type_._type, "DATE") + self.assertIsNone(result.name) + self.assertIsNone(result.description) + + def test_to_api_repr(self): + EXPECTED = { + "type": "RANGE", + "rangeElementType": {"type": "DATE"}, + } + param_type = self._make_one("DATE", name="foo", description="bar") + result = param_type.to_api_repr() + self.assertEqual(result, EXPECTED) + + def test__repr__(self): + param_type = self._make_one("DATE", name="foo", description="bar") + param_repr = "RangeQueryParameterType(ScalarQueryParameterType('DATE'), name='foo', description='bar')" + self.assertEqual(repr(param_type), param_repr) + + def test__eq__(self): + param_type1 = self._make_one("DATE", name="foo", description="bar") + self.assertEqual(param_type1, param_type1) + self.assertNotEqual(param_type1, object()) + + alias = self._make_one("DATE", name="foo", description="bar") + self.assertIsNot(param_type1, alias) + self.assertEqual(param_type1, alias) + + wrong_type = self._make_one("DATETIME", name="foo", description="bar") + self.assertNotEqual(param_type1, wrong_type) + + wrong_name = self._make_one("DATETIME", name="foo2", description="bar") + self.assertNotEqual(param_type1, wrong_name) + + wrong_description = self._make_one("DATETIME", name="foo", description="bar2") + self.assertNotEqual(param_type1, wrong_description) + + def test_with_name(self): + param_type1 = self._make_one("DATE", name="foo", description="bar") + param_type2 = param_type1.with_name("foo2") + + self.assertIsNot(param_type1, param_type2) + self.assertEqual(param_type2.type_._type, "DATE") + self.assertEqual(param_type2.name, "foo2") + self.assertEqual(param_type2.description, "bar") + + class Test__AbstractQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): @@ -663,6 +757,460 @@ def test___repr__(self): self.assertEqual(repr(field1), expected) +class Test_RangeQueryParameter(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import RangeQueryParameter + + return RangeQueryParameter + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + param = self._make_one( + range_element_type="DATE", start="2016-08-11", name="foo" + ) + self.assertEqual(param.name, "foo") + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, "2016-08-11") + self.assertIs(param.end, None) + + def test_ctor_w_datetime_query_parameter_type_str(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATETIME") + start_datetime = datetime.datetime(year=2020, month=12, day=31, hour=12) + end_datetime = datetime.datetime(year=2021, month=12, day=31, hour=12) + param = self._make_one( + range_element_type="DATETIME", + start=start_datetime, + end=end_datetime, + name="foo", + ) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, start_datetime) + self.assertEqual(param.end, end_datetime) + self.assertEqual(param.name, "foo") + + def test_ctor_w_datetime_query_parameter_type_type(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATETIME") + param = self._make_one(range_element_type=range_element_type) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, None) + self.assertEqual(param.end, None) + self.assertEqual(param.name, None) + + def test_ctor_w_timestamp_query_parameter_typ_str(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="TIMESTAMP") + start_datetime = datetime.datetime(year=2020, month=12, day=31, hour=12) + end_datetime = datetime.datetime(year=2021, month=12, day=31, hour=12) + param = self._make_one( + range_element_type="TIMESTAMP", + start=start_datetime, + end=end_datetime, + name="foo", + ) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, start_datetime) + self.assertEqual(param.end, end_datetime) + self.assertEqual(param.name, "foo") + + def test_ctor_w_timestamp_query_parameter_type_type(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="TIMESTAMP") + param = self._make_one(range_element_type=range_element_type) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, None) + self.assertEqual(param.end, None) + self.assertEqual(param.name, None) + + def test_ctor_w_date_query_parameter_type_str(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + start_date = datetime.date(year=2020, month=12, day=31) + end_date = datetime.date(year=2021, month=12, day=31) + param = self._make_one( + range_element_type="DATE", + start=start_date, + end=end_date, + name="foo", + ) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, start_date) + self.assertEqual(param.end, end_date) + self.assertEqual(param.name, "foo") + + def test_ctor_w_date_query_parameter_type_type(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + param = self._make_one(range_element_type=range_element_type) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, None) + self.assertEqual(param.end, None) + self.assertEqual(param.name, None) + + def test_ctor_w_name_empty_str(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + param = self._make_one( + range_element_type="DATE", + name="", + ) + self.assertEqual(param.range_element_type, range_element_type) + self.assertIs(param.start, None) + self.assertIs(param.end, None) + self.assertEqual(param.name, "") + + def test_ctor_wo_value(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATETIME") + param = self._make_one(range_element_type="DATETIME", name="foo") + self.assertEqual(param.range_element_type, range_element_type) + self.assertIs(param.start, None) + self.assertIs(param.end, None) + self.assertEqual(param.name, "foo") + + def test_ctor_w_unsupported_query_parameter_type_str(self): + with self.assertRaises(ValueError): + self._make_one(range_element_type="TIME", name="foo") + + def test_ctor_w_unsupported_query_parameter_type_type(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + range_element_type.type_._type = "TIME" + with self.assertRaises(ValueError): + self._make_one(range_element_type=range_element_type, name="foo") + + def test_ctor_w_unsupported_query_parameter_type_input(self): + with self.assertRaises(ValueError): + self._make_one(range_element_type=None, name="foo") + + def test_positional(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + range_element_type = RangeQueryParameterType(type_="DATE") + klass = self._get_target_class() + param = klass.positional( + range_element_type="DATE", start="2016-08-11", end="2016-08-12" + ) + self.assertIs(param.name, None) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, "2016-08-11") + self.assertEqual(param.end, "2016-08-12") + + def test_from_api_repr_w_name(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + RESOURCE = { + "name": "foo", + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": "2020-12-31"}} + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + range_element_type = RangeQueryParameterType(type_="DATE") + self.assertEqual(param.name, "foo") + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, None) + self.assertEqual(param.end, "2020-12-31") + + def test_from_api_repr_wo_name(self): + from google.cloud.bigquery.query import RangeQueryParameterType + + RESOURCE = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": "2020-12-31"}} + }, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + range_element_type = RangeQueryParameterType(type_="DATE") + self.assertEqual(param.name, None) + self.assertEqual(param.range_element_type, range_element_type) + self.assertEqual(param.start, None) + self.assertEqual(param.end, "2020-12-31") + + def test_from_api_repr_wo_value(self): + # Back-end may not send back values for None params. See #9027 + from google.cloud.bigquery.query import RangeQueryParameterType + + RESOURCE = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + } + range_element_type = RangeQueryParameterType(type_="DATE") + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertIs(param.name, None) + self.assertEqual(param.range_element_type, range_element_type) + self.assertIs(param.start, None) + self.assertIs(param.end, None) + + def test_to_api_repr_w_name(self): + EXPECTED = { + "name": "foo", + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": "2016-08-11"}} + }, + } + param = self._make_one(range_element_type="DATE", end="2016-08-11", name="foo") + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_name(self): + EXPECTED = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": "2016-08-11"}} + }, + } + klass = self._get_target_class() + param = klass.positional(range_element_type="DATE", end="2016-08-11") + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_date_date(self): + today = datetime.date.today() + today_str = today.strftime("%Y-%m-%d") + EXPECTED = { + "name": "foo", + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": today_str}} + }, + } + param = self._make_one(range_element_type="DATE", end=today, name="foo") + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_str(self): + EXPECTED = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATETIME", + }, + }, + "parameterValue": { + "rangeValue": { + "start": {"value": None}, + "end": {"value": "2020-01-01T12:00:00.000000"}, + } + }, + } + klass = self._get_target_class() + end_datetime = datetime.datetime(year=2020, month=1, day=1, hour=12) + param = klass.positional(range_element_type="DATETIME", end=end_datetime) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_datetime_datetime(self): + from google.cloud.bigquery._helpers import _RFC3339_MICROS_NO_ZULU + + now = datetime.datetime.utcnow() + now_str = now.strftime(_RFC3339_MICROS_NO_ZULU) + EXPECTED = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATETIME", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": now_str}} + }, + } + klass = self._get_target_class() + param = klass.positional(range_element_type="DATETIME", end=now) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_str(self): + EXPECTED = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "TIMESTAMP", + }, + }, + "parameterValue": { + "rangeValue": { + "start": {"value": None}, + "end": {"value": "2020-01-01 12:00:00+00:00"}, + } + }, + } + klass = self._get_target_class() + end_timestamp = datetime.datetime(year=2020, month=1, day=1, hour=12) + param = klass.positional(range_element_type="TIMESTAMP", end=end_timestamp) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_w_timestamp_timestamp(self): + from google.cloud._helpers import UTC # type: ignore + + now = datetime.datetime.utcnow() + now = now.astimezone(UTC) + now_str = str(now) + EXPECTED = { + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "TIMESTAMP", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": now_str}} + }, + } + klass = self._get_target_class() + param = klass.positional(range_element_type="TIMESTAMP", end=now) + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_wo_values(self): + EXPECTED = { + "name": "foo", + "parameterType": { + "type": "RANGE", + "rangeElementType": { + "type": "DATE", + }, + }, + "parameterValue": { + "rangeValue": {"start": {"value": None}, "end": {"value": None}} + }, + } + param = self._make_one(range_element_type="DATE", name="foo") + self.assertEqual(param.to_api_repr(), EXPECTED) + + def test_to_api_repr_unsupported_value_type(self): + with self.assertRaisesRegex( + ValueError, "Cannot convert range element value from type" + ): + range_param = self._make_one( + range_element_type="DATE", start=datetime.date.today() + ) + range_param.range_element_type.type_._type = "LONG" + range_param.to_api_repr() + + def test___eq__(self): + param = self._make_one( + range_element_type="DATE", start="2016-08-11", name="foo" + ) + self.assertEqual(param, param) + self.assertNotEqual(param, object()) + alias = self._make_one( + range_element_type="DATE", start="2016-08-11", name="bar" + ) + self.assertNotEqual(param, alias) + wrong_type = self._make_one( + range_element_type="DATETIME", + start="2020-12-31 12:00:00.000000", + name="foo", + ) + self.assertNotEqual(param, wrong_type) + wrong_val = self._make_one( + range_element_type="DATE", start="2016-08-12", name="foo" + ) + self.assertNotEqual(param, wrong_val) + + def test___eq___wrong_type(self): + param = self._make_one( + range_element_type="DATE", start="2016-08-11", name="foo" + ) + other = object() + self.assertNotEqual(param, other) + self.assertEqual(param, mock.ANY) + + def test___eq___name_mismatch(self): + param = self._make_one( + range_element_type="DATE", start="2016-08-11", name="foo" + ) + other = self._make_one( + range_element_type="DATE", start="2016-08-11", name="bar" + ) + self.assertNotEqual(param, other) + + def test___eq___field_type_mismatch(self): + param = self._make_one(range_element_type="DATE") + other = self._make_one(range_element_type="DATETIME") + self.assertNotEqual(param, other) + + def test___eq___value_mismatch(self): + param = self._make_one(range_element_type="DATE", start="2016-08-11") + other = self._make_one(range_element_type="DATE", start="2016-08-12") + self.assertNotEqual(param, other) + + def test___eq___hit(self): + param = self._make_one(range_element_type="DATE", start="2016-08-12") + other = self._make_one(range_element_type="DATE", start="2016-08-12") + self.assertEqual(param, other) + + def test___ne___wrong_type(self): + param = self._make_one(range_element_type="DATE") + other = object() + self.assertNotEqual(param, other) + self.assertEqual(param, mock.ANY) + + def test___ne___same_value(self): + param1 = self._make_one(range_element_type="DATE") + param2 = self._make_one(range_element_type="DATE") + # unittest ``assertEqual`` uses ``==`` not ``!=``. + comparison_val = param1 != param2 + self.assertFalse(comparison_val) + + def test___ne___different_values(self): + param1 = self._make_one(range_element_type="DATE", start="2016-08-12") + param2 = self._make_one(range_element_type="DATE") + self.assertNotEqual(param1, param2) + + def test___repr__(self): + param1 = self._make_one(range_element_type="DATE", start="2016-08-12") + expected = "RangeQueryParameter(None, {'type': 'RANGE', 'rangeElementType': {'type': 'DATE'}}, '2016-08-12', None)" + self.assertEqual(repr(param1), expected) + + def _make_subparam(name, type_, value): from google.cloud.bigquery.query import ScalarQueryParameter From 53c2cbf98d2961f553747514de273bcd5c117f0e Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 5 Mar 2024 09:47:31 -0800 Subject: [PATCH 256/536] fix: augment universe_domain handling (#1837) * fix: augment universe_domain handling This PR revisits the universe resolution for the BQ client, and handles new requirements like env-based specification and validation. * lint * skipif core too old * deps * add import * no-cover in test helper * lint * ignore google.auth typing * capitalization * change to raise in test code * reviewer feedback * var fix --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/_helpers.py | 56 ++++++++++++++++++++++ google/cloud/bigquery/client.py | 21 ++++---- tests/unit/helpers.py | 14 ++++++ tests/unit/test__helpers.py | 80 ++++++++++++++++++++++++++++++- 4 files changed, 162 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 905d4aee1..ec4ac9970 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -30,6 +30,8 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes +from google.auth import credentials as ga_credentials # type: ignore +from google.api_core import client_options as client_options_lib _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -55,9 +57,63 @@ _DEFAULT_HOST = "https://bigquery.googleapis.com" """Default host for JSON API.""" +_DEFAULT_HOST_TEMPLATE = "https://bigquery.{UNIVERSE_DOMAIN}" +""" Templatized endpoint format. """ + _DEFAULT_UNIVERSE = "googleapis.com" """Default universe for the JSON API.""" +_UNIVERSE_DOMAIN_ENV = "GOOGLE_CLOUD_UNIVERSE_DOMAIN" +"""Environment variable for setting universe domain.""" + + +def _get_client_universe( + client_options: Optional[Union[client_options_lib.ClientOptions, dict]] +) -> str: + """Retrieves the specified universe setting. + + Args: + client_options: specified client options. + Returns: + str: resolved universe setting. + + """ + if isinstance(client_options, dict): + client_options = client_options_lib.from_dict(client_options) + universe = _DEFAULT_UNIVERSE + if hasattr(client_options, "universe_domain"): + options_universe = getattr(client_options, "universe_domain") + if options_universe is not None and len(options_universe) > 0: + universe = options_universe + else: + env_universe = os.getenv(_UNIVERSE_DOMAIN_ENV) + if isinstance(env_universe, str) and len(env_universe) > 0: + universe = env_universe + return universe + + +def _validate_universe(client_universe: str, credentials: ga_credentials.Credentials): + """Validates that client provided universe and universe embedded in credentials match. + + Args: + client_universe (str): The universe domain configured via the client options. + credentials (ga_credentials.Credentials): The credentials being used in the client. + + Raises: + ValueError: when client_universe does not match the universe in credentials. + """ + if hasattr(credentials, "universe_domain"): + cred_universe = getattr(credentials, "universe_domain") + if isinstance(cred_universe, str): + if client_universe != cred_universe: + raise ValueError( + "The configured universe domain " + f"({client_universe}) does not match the universe domain " + f"found in the credentials ({cred_universe}). " + "If you haven't configured the universe domain explicitly, " + f"`{_DEFAULT_UNIVERSE}` is the default." + ) + def _get_bigquery_host(): return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index a871dc003..cb4daa897 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -78,7 +78,10 @@ from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._helpers import _get_bigquery_host from google.cloud.bigquery._helpers import _DEFAULT_HOST +from google.cloud.bigquery._helpers import _DEFAULT_HOST_TEMPLATE from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE +from google.cloud.bigquery._helpers import _validate_universe +from google.cloud.bigquery._helpers import _get_client_universe from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem @@ -245,6 +248,7 @@ def __init__( kw_args = {"client_info": client_info} bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None + client_universe = None if client_options: if isinstance(client_options, dict): client_options = google.api_core.client_options.from_dict( @@ -253,14 +257,15 @@ def __init__( if client_options.api_endpoint: api_endpoint = client_options.api_endpoint kw_args["api_endpoint"] = api_endpoint - elif ( - hasattr(client_options, "universe_domain") - and client_options.universe_domain - and client_options.universe_domain is not _DEFAULT_UNIVERSE - ): - kw_args["api_endpoint"] = _DEFAULT_HOST.replace( - _DEFAULT_UNIVERSE, client_options.universe_domain - ) + else: + client_universe = _get_client_universe(client_options) + if client_universe != _DEFAULT_UNIVERSE: + kw_args["api_endpoint"] = _DEFAULT_HOST_TEMPLATE.replace( + "{UNIVERSE_DOMAIN}", client_universe + ) + # Ensure credentials and universe are not in conflict. + if hasattr(self, "_credentials") and client_universe is not None: + _validate_universe(client_universe, self._credentials) self._connection = Connection(self, **kw_args) self._location = location diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py index 67aeaca35..bc92c0df6 100644 --- a/tests/unit/helpers.py +++ b/tests/unit/helpers.py @@ -43,6 +43,20 @@ def make_client(project="PROJECT", **kw): return google.cloud.bigquery.client.Client(project, credentials, **kw) +def make_creds(creds_universe: None): + from google.auth import credentials + + class TestingCreds(credentials.Credentials): + def refresh(self, request): # pragma: NO COVER + raise NotImplementedError + + @property + def universe_domain(self): + return creds_universe + + return TestingCreds() + + def make_dataset_reference_string(project, ds_id): return f"{project}.{ds_id}" diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 87ab46669..019d2e7bd 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -17,8 +17,86 @@ import decimal import json import unittest - +import os import mock +import pytest +import packaging +import google.api_core + + +@pytest.mark.skipif( + packaging.version.parse(getattr(google.api_core, "__version__", "0.0.0")) + < packaging.version.Version("2.15.0"), + reason="universe_domain not supported with google-api-core < 2.15.0", +) +class Test_get_client_universe(unittest.TestCase): + def test_with_none(self): + from google.cloud.bigquery._helpers import _get_client_universe + + self.assertEqual("googleapis.com", _get_client_universe(None)) + + def test_with_dict(self): + from google.cloud.bigquery._helpers import _get_client_universe + + options = {"universe_domain": "foo.com"} + self.assertEqual("foo.com", _get_client_universe(options)) + + def test_with_dict_empty(self): + from google.cloud.bigquery._helpers import _get_client_universe + + options = {"universe_domain": ""} + self.assertEqual("googleapis.com", _get_client_universe(options)) + + def test_with_client_options(self): + from google.cloud.bigquery._helpers import _get_client_universe + from google.api_core import client_options + + options = client_options.from_dict({"universe_domain": "foo.com"}) + self.assertEqual("foo.com", _get_client_universe(options)) + + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"}) + def test_with_environ(self): + from google.cloud.bigquery._helpers import _get_client_universe + + self.assertEqual("foo.com", _get_client_universe(None)) + + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": ""}) + def test_with_environ_empty(self): + from google.cloud.bigquery._helpers import _get_client_universe + + self.assertEqual("googleapis.com", _get_client_universe(None)) + + +class Test_validate_universe(unittest.TestCase): + def test_with_none(self): + from google.cloud.bigquery._helpers import _validate_universe + + # should not raise + _validate_universe("googleapis.com", None) + + def test_with_no_universe_creds(self): + from google.cloud.bigquery._helpers import _validate_universe + from .helpers import make_creds + + creds = make_creds(None) + # should not raise + _validate_universe("googleapis.com", creds) + + def test_with_matched_universe_creds(self): + from google.cloud.bigquery._helpers import _validate_universe + from .helpers import make_creds + + creds = make_creds("googleapis.com") + # should not raise + _validate_universe("googleapis.com", creds) + + def test_with_mismatched_universe_creds(self): + from google.cloud.bigquery._helpers import _validate_universe + from .helpers import make_creds + + creds = make_creds("foo.com") + with self.assertRaises(ValueError): + _validate_universe("googleapis.com", creds) class Test_not_null(unittest.TestCase): From 86a45c989836b34dca456bac014352e55d6f86c0 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 6 Mar 2024 17:04:06 -0800 Subject: [PATCH 257/536] feat: support range sql (#1807) * feat: support range sql * add unit tests * add system test * lint and remove debug code * lint and remove debug code * remove added blank line * add comment for legacy type --- google/cloud/bigquery/enums.py | 2 ++ google/cloud/bigquery/standard_sql.py | 36 ++++++++++++++++++- tests/system/test_client.py | 38 ++++++++++++++++++++ tests/unit/test_standard_sql_types.py | 52 +++++++++++++++++++++++++++ 4 files changed, 127 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 553853630..d75037ad1 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -254,6 +254,7 @@ def _generate_next_value_(name, start, count, last_values): JSON = enum.auto() ARRAY = enum.auto() STRUCT = enum.auto() + RANGE = enum.auto() class EntityTypes(str, enum.Enum): @@ -292,6 +293,7 @@ class SqlTypeNames(str, enum.Enum): TIME = "TIME" DATETIME = "DATETIME" INTERVAL = "INTERVAL" # NOTE: not available in legacy types + RANGE = "RANGE" # NOTE: not available in legacy types class WriteDisposition(object): diff --git a/google/cloud/bigquery/standard_sql.py b/google/cloud/bigquery/standard_sql.py index e0f22b2de..68332eb80 100644 --- a/google/cloud/bigquery/standard_sql.py +++ b/google/cloud/bigquery/standard_sql.py @@ -43,6 +43,7 @@ class StandardSqlDataType: ] } } + RANGE: {type_kind="RANGE", range_element_type="DATETIME"} Args: type_kind: @@ -52,6 +53,8 @@ class StandardSqlDataType: The type of the array's elements, if type_kind is ARRAY. struct_type: The fields of this struct, in order, if type_kind is STRUCT. + range_element_type: + The type of the range's elements, if type_kind is RANGE. """ def __init__( @@ -61,12 +64,14 @@ def __init__( ] = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, array_element_type: Optional["StandardSqlDataType"] = None, struct_type: Optional["StandardSqlStructType"] = None, + range_element_type: Optional["StandardSqlDataType"] = None, ): self._properties: Dict[str, Any] = {} self.type_kind = type_kind self.array_element_type = array_element_type self.struct_type = struct_type + self.range_element_type = range_element_type @property def type_kind(self) -> Optional[StandardSqlTypeNames]: @@ -127,6 +132,28 @@ def struct_type(self, value: Optional["StandardSqlStructType"]): else: self._properties["structType"] = struct_type + @property + def range_element_type(self) -> Optional["StandardSqlDataType"]: + """The type of the range's elements, if type_kind = "RANGE". Must be + one of DATETIME, DATE, or TIMESTAMP.""" + range_element_info = self._properties.get("rangeElementType") + + if range_element_info is None: + return None + + result = StandardSqlDataType() + result._properties = range_element_info # We do not use a copy on purpose. + return result + + @range_element_type.setter + def range_element_type(self, value: Optional["StandardSqlDataType"]): + range_element_type = None if value is None else value.to_api_repr() + + if range_element_type is None: + self._properties.pop("rangeElementType", None) + else: + self._properties["rangeElementType"] = range_element_type + def to_api_repr(self) -> Dict[str, Any]: """Construct the API resource representation of this SQL data type.""" return copy.deepcopy(self._properties) @@ -155,7 +182,13 @@ def from_api_repr(cls, resource: Dict[str, Any]): if struct_info: struct_type = StandardSqlStructType.from_api_repr(struct_info) - return cls(type_kind, array_element_type, struct_type) + range_element_type = None + if type_kind == StandardSqlTypeNames.RANGE: + range_element_info = resource.get("rangeElementType") + if range_element_info: + range_element_type = cls.from_api_repr(range_element_info) + + return cls(type_kind, array_element_type, struct_type, range_element_type) def __eq__(self, other): if not isinstance(other, StandardSqlDataType): @@ -165,6 +198,7 @@ def __eq__(self, other): self.type_kind == other.type_kind and self.array_element_type == other.array_element_type and self.struct_type == other.struct_type + and self.range_element_type == other.range_element_type ) def __str__(self): diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 74c152cf2..04740de8a 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2193,6 +2193,44 @@ def test_create_routine(self): assert len(rows) == 1 assert rows[0].max_value == 100.0 + def test_create_routine_with_range(self): + routine_name = "routine_range" + dataset = self.temp_dataset(_make_dataset_id("routine_range")) + + routine = bigquery.Routine( + dataset.routine(routine_name), + type_="SCALAR_FUNCTION", + language="SQL", + body="RANGE_START(x)", + arguments=[ + bigquery.RoutineArgument( + name="x", + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.RANGE, + range_element_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.DATE + ), + ), + ) + ], + return_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.DATE + ), + ) + + query_string = ( + "SELECT `{}`(RANGE '[2016-08-12, UNBOUNDED)') as range_start;".format( + str(routine.reference) + ) + ) + + routine = helpers.retry_403(Config.CLIENT.create_routine)(routine) + query_job = helpers.retry_403(Config.CLIENT.query)(query_string) + rows = list(query_job.result()) + + assert len(rows) == 1 + assert rows[0].range_start == datetime.date(2016, 8, 12) + def test_create_tvf_routine(self): from google.cloud.bigquery import ( Routine, diff --git a/tests/unit/test_standard_sql_types.py b/tests/unit/test_standard_sql_types.py index 0ba0e0cfd..3ed912b5a 100644 --- a/tests/unit/test_standard_sql_types.py +++ b/tests/unit/test_standard_sql_types.py @@ -129,6 +129,28 @@ def test_to_api_repr_struct_type_w_field_types(self): } assert result == expected + def test_to_api_repr_range_type_element_type_missing(self): + instance = self._make_one( + bq.StandardSqlTypeNames.RANGE, range_element_type=None + ) + + result = instance.to_api_repr() + + assert result == {"typeKind": "RANGE"} + + def test_to_api_repr_range_type_w_element_type(self): + range_element_type = self._make_one(type_kind=bq.StandardSqlTypeNames.DATE) + instance = self._make_one( + bq.StandardSqlTypeNames.RANGE, range_element_type=range_element_type + ) + + result = instance.to_api_repr() + + assert result == { + "typeKind": "RANGE", + "rangeElementType": {"typeKind": "DATE"}, + } + def test_from_api_repr_empty_resource(self): klass = self._get_target_class() result = klass.from_api_repr(resource={}) @@ -276,6 +298,31 @@ def test_from_api_repr_struct_type_incomplete_field_info(self): ) assert result == expected + def test_from_api_repr_range_type_full(self): + klass = self._get_target_class() + resource = {"typeKind": "RANGE", "rangeElementType": {"typeKind": "DATE"}} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.RANGE, + range_element_type=klass(type_kind=bq.StandardSqlTypeNames.DATE), + ) + assert result == expected + + def test_from_api_repr_range_type_missing_element_type(self): + klass = self._get_target_class() + resource = {"typeKind": "RANGE"} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.RANGE, + range_element_type=None, + struct_type=None, + ) + assert result == expected + def test__eq__another_type(self): instance = self._make_one() @@ -321,6 +368,11 @@ def test__eq__similar_instance(self): bq.StandardSqlStructType(fields=[bq.StandardSqlField(name="foo")]), bq.StandardSqlStructType(fields=[bq.StandardSqlField(name="bar")]), ), + ( + "range_element_type", + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.DATE), + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.DATETIME), + ), ), ) def test__eq__attribute_differs(self, attr_name, value, value2): From b8189929b6008f7780214822062f8ed05d8d2a01 Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 6 Mar 2024 17:42:18 -0800 Subject: [PATCH 258/536] fix: supplementary fix to env-based universe resolution (#1844) * fix: supplementary fix to env-based universe resolution There's a corner case where conversion from dict to a ClientOptions will return a universe_domain value as None that wasn't covered by initial testing. This updates the resolution code and adds tests to exercise the new path. * formatting --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/_helpers.py | 11 +++++++---- tests/unit/test__helpers.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index ec4ac9970..7198b60c2 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -81,10 +81,13 @@ def _get_client_universe( if isinstance(client_options, dict): client_options = client_options_lib.from_dict(client_options) universe = _DEFAULT_UNIVERSE - if hasattr(client_options, "universe_domain"): - options_universe = getattr(client_options, "universe_domain") - if options_universe is not None and len(options_universe) > 0: - universe = options_universe + options_universe = getattr(client_options, "universe_domain", None) + if ( + options_universe + and isinstance(options_universe, str) + and len(options_universe) > 0 + ): + universe = options_universe else: env_universe = os.getenv(_UNIVERSE_DOMAIN_ENV) if isinstance(env_universe, str) and len(env_universe) > 0: diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 019d2e7bd..7e8d815d2 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -60,6 +60,21 @@ def test_with_environ(self): self.assertEqual("foo.com", _get_client_universe(None)) + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"}) + def test_with_environ_and_dict(self): + from google.cloud.bigquery._helpers import _get_client_universe + + options = ({"credentials_file": "file.json"},) + self.assertEqual("foo.com", _get_client_universe(options)) + + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"}) + def test_with_environ_and_empty_options(self): + from google.cloud.bigquery._helpers import _get_client_universe + from google.api_core import client_options + + options = client_options.from_dict({}) + self.assertEqual("foo.com", _get_client_universe(options)) + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": ""}) def test_with_environ_empty(self): from google.cloud.bigquery._helpers import _get_client_universe From 08762fbf3289622c2b948da919ae49567d0378d5 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 7 Mar 2024 15:10:32 -0500 Subject: [PATCH 259/536] chore: use mock from unittest (#1823) * chore: use mock from unittest * correct user_credentials_test.py * add try except for Python 3.7 * fixes linting * adjustments to testing suite to account for dependencies * updates to mypy_samples.py * linting for noxfile.py --------- Co-authored-by: Chalmer Lowe --- noxfile.py | 8 ++++---- samples/desktopapp/user_credentials_test.py | 11 +++++++++-- samples/tests/conftest.py | 2 +- testing/constraints-3.7.txt | 13 +++++++++---- tests/unit/conftest.py | 3 ++- tests/unit/helpers.py | 7 ++++--- tests/unit/job/test_base.py | 2 +- tests/unit/job/test_copy.py | 2 +- tests/unit/job/test_extract.py | 2 +- tests/unit/job/test_load.py | 3 +-- tests/unit/job/test_query.py | 16 ++++++++-------- tests/unit/job/test_query_pandas.py | 2 +- tests/unit/test__helpers.py | 5 +++-- tests/unit/test__http.py | 2 +- tests/unit/test__pandas_helpers.py | 7 +++---- tests/unit/test__versions_helpers.py | 4 ++-- tests/unit/test_client.py | 13 ++++++------- tests/unit/test_create_dataset.py | 3 ++- tests/unit/test_dataset.py | 2 +- tests/unit/test_dbapi_connection.py | 3 +-- tests/unit/test_dbapi_cursor.py | 2 +- tests/unit/test_encryption_configuration.py | 2 +- tests/unit/test_job_retry.py | 2 +- tests/unit/test_list_datasets.py | 3 ++- tests/unit/test_list_jobs.py | 2 +- tests/unit/test_list_projects.py | 3 ++- tests/unit/test_magics.py | 6 +++--- tests/unit/test_opentelemetry_tracing.py | 3 +-- tests/unit/test_query.py | 3 +-- tests/unit/test_retry.py | 2 +- tests/unit/test_schema.py | 2 +- tests/unit/test_table.py | 2 +- 32 files changed, 77 insertions(+), 65 deletions(-) diff --git a/noxfile.py b/noxfile.py index ae022232e..c31d098b8 100644 --- a/noxfile.py +++ b/noxfile.py @@ -72,7 +72,6 @@ def default(session, install_extras=True): # Install all test dependencies, then install local packages in-place. session.install( - "mock", "pytest", "google-cloud-testutils", "pytest-cov", @@ -89,6 +88,8 @@ def default(session, install_extras=True): install_target = "." session.install("-e", install_target, "-c", constraints_path) + session.run("python", "-m", "pip", "freeze") + # Run py.test against the unit tests. session.run( "py.test", @@ -176,7 +177,7 @@ def system(session): # Install all test dependencies, then install local packages in place. session.install( - "mock", "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path + "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path ) if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true": # mTLS test requires pyopenssl and latest google-cloud-storage @@ -249,7 +250,7 @@ def snippets(session): ) # Install all test dependencies, then install local packages in place. - session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path) + session.install("pytest", "google-cloud-testutils", "-c", constraints_path) session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) @@ -336,7 +337,6 @@ def prerelease_deps(session): "google-cloud-datacatalog", "google-cloud-storage", "google-cloud-testutils", - "mock", "psutil", "pytest", "pytest-cov", diff --git a/samples/desktopapp/user_credentials_test.py b/samples/desktopapp/user_credentials_test.py index baa9e33f1..252b843c4 100644 --- a/samples/desktopapp/user_credentials_test.py +++ b/samples/desktopapp/user_credentials_test.py @@ -13,17 +13,24 @@ # limitations under the License. import os +import sys from typing import Iterator, Union +from unittest import mock import google.auth -import mock import pytest from .user_credentials import main # type: ignore PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] -MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock] + +if sys.version_info >= (3, 8): + # Python 3.8+ has an AsyncMock attribute in unittest.mock, but 3.7 does not + MockType = Union[mock.MagicMock, mock.AsyncMock] +else: + # Other definitions and imports + MockType = Union[mock.MagicMock] @pytest.fixture diff --git a/samples/tests/conftest.py b/samples/tests/conftest.py index 2b5b89c43..91603bef2 100644 --- a/samples/tests/conftest.py +++ b/samples/tests/conftest.py @@ -14,10 +14,10 @@ import datetime from typing import Iterator, List +from unittest import mock import uuid import google.auth -import mock import pytest from google.cloud import bigquery diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 9f71bf11a..28787adb7 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -7,11 +7,16 @@ # Then this file should have foo==1.14.0 db-dtypes==0.3.0 geopandas==0.9.0 -google-api-core==1.31.5 -google-cloud-bigquery-storage==2.6.0 -google-cloud-core==1.6.0 -google-resumable-media==0.6.0 +google-api-core==2.17.1 +google-auth==2.28.1 +google-cloud-bigquery-storage==2.24.0 +google-cloud-core==2.4.1 +google-cloud-testutils==1.4.0 +google-crc32c==1.5.0 +google-resumable-media==2.7.0 +googleapis-common-protos==1.62.0 grpcio==1.47.0 +grpcio-status==1.47.0 ipywidgets==7.7.1 ipython==7.23.1 ipykernel==6.0.0 diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index c2ae78eaa..ebe2d2a7a 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock +from unittest import mock + import pytest from .helpers import make_client diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py index bc92c0df6..c5414138e 100644 --- a/tests/unit/helpers.py +++ b/tests/unit/helpers.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + +import pytest + import google.cloud.bigquery.client import google.cloud.bigquery.dataset -import mock -import pytest def make_connection(*responses): import google.cloud.bigquery._http - import mock from google.cloud.exceptions import NotFound mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index a61fd3198..186729529 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -15,11 +15,11 @@ import copy import http import unittest +from unittest import mock from google.api_core import exceptions import google.api_core.retry from google.api_core.future import polling -import mock import pytest from ..helpers import make_connection diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py index a3b5c70e3..e1bb20db2 100644 --- a/tests/unit/job/test_copy.py +++ b/tests/unit/job/test_copy.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock +from unittest import mock from ..helpers import make_connection diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py index 8bada51af..76ee72f28 100644 --- a/tests/unit/job/test_extract.py +++ b/tests/unit/job/test_extract.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock +from unittest import mock from ..helpers import make_connection diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index c6bbaa2fb..976fec914 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -13,8 +13,7 @@ # limitations under the License. import copy - -import mock +from unittest import mock from ..helpers import make_connection diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 776234b5b..37ac7ba5e 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -17,11 +17,11 @@ import http import textwrap import types +from unittest import mock import freezegun from google.api_core import exceptions import google.api_core.retry -import mock import requests from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS @@ -382,11 +382,11 @@ def test__done_or_raise_w_timeout(self): job._done_or_raise(timeout=42) fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertEqual(call_args.kwargs.get("timeout"), 42) + call_args = fake_get_results.call_args[0][1] + self.assertEqual(call_args.timeout, 600.0) - call_args = fake_reload.call_args - self.assertEqual(call_args.kwargs.get("timeout"), 42) + call_args = fake_reload.call_args[1] + self.assertEqual(call_args["timeout"], 42) def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self): client = _make_client(project=self.PROJECT) @@ -404,11 +404,11 @@ def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self): expected_timeout = 5.5 fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + call_args = fake_get_results.call_args[0][1] + self.assertAlmostEqual(call_args.timeout, 600.0) call_args = fake_reload.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + self.assertAlmostEqual(call_args[1].get("timeout"), expected_timeout) def test__done_or_raise_w_query_results_error_reload_ok(self): client = _make_client(project=self.PROJECT) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 6189830ff..1473ef283 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -15,8 +15,8 @@ import concurrent.futures import copy import json +from unittest import mock -import mock import pytest diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 7e8d815d2..320c57737 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -16,11 +16,12 @@ import datetime import decimal import json -import unittest import os -import mock import pytest import packaging +import unittest +from unittest import mock + import google.api_core diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py index 09f6d29d7..fd7ecdc42 100644 --- a/tests/unit/test__http.py +++ b/tests/unit/test__http.py @@ -13,8 +13,8 @@ # limitations under the License. import unittest +from unittest import mock -import mock import requests diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 7c83d3ec5..abee39065 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -18,6 +18,7 @@ import functools import operator import queue +from unittest import mock import warnings try: @@ -25,8 +26,6 @@ except ImportError: import importlib_metadata as metadata -import mock - try: import pandas import pandas.api.types @@ -1200,7 +1199,7 @@ def test_dataframe_to_parquet_compression_method(module_under_test): call_args = fake_write_table.call_args assert call_args is not None - assert call_args.kwargs.get("compression") == "ZSTD" + assert call_args[1].get("compression") == "ZSTD" @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1635,7 +1634,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): schema.SchemaField("field01", "STRING", mode="REQUIRED"), schema.SchemaField("field02", "BOOL", mode="NULLABLE"), ] - schema_arg = fake_to_arrow.call_args.args[1] + schema_arg = fake_to_arrow.call_args[0][1] assert schema_arg == expected_schema_arg diff --git a/tests/unit/test__versions_helpers.py b/tests/unit/test__versions_helpers.py index afe170e7a..8fa099627 100644 --- a/tests/unit/test__versions_helpers.py +++ b/tests/unit/test__versions_helpers.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest +from unittest import mock -import mock +import pytest try: import pyarrow # type: ignore diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 42581edc1..be8bef03c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -24,9 +24,9 @@ import json import operator import unittest +from unittest import mock import warnings -import mock import requests import packaging import pytest @@ -8733,9 +8733,9 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): parquet_compression="LZ4", ) - call_args = fake_to_parquet.call_args + call_args = fake_to_parquet.call_args[1] assert call_args is not None - assert call_args.kwargs.get("parquet_compression") == "LZ4" + assert call_args.get("parquet_compression") == "LZ4" @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -9498,12 +9498,11 @@ def test__do_resumable_upload_custom_project(self): timeout=mock.ANY, ) - # Check the project ID used in the call to initiate resumable upload. initiation_url = next( ( - call.args[1] + call[0][1] for call in transport.request.call_args_list - if call.args[0] == "POST" and "uploadType=resumable" in call.args[1] + if call[0][0] == "POST" and "uploadType=resumable" in call[0][1] ), None, ) # pragma: NO COVER @@ -9525,7 +9524,7 @@ def test__do_resumable_upload_custom_timeout(self): # The timeout should be applied to all underlying calls. for call_args in transport.request.call_args_list: - assert call_args.kwargs.get("timeout") == 3.14 + assert call_args[1].get("timeout") == 3.14 def test__do_multipart_upload(self): transport = self._make_transport([self._make_response(http.client.OK)]) diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index 8374e6e0a..a2491a812 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -12,11 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset from google.cloud.bigquery.retry import DEFAULT_TIMEOUT -import mock import pytest diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 423349a51..c0164bc73 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -13,8 +13,8 @@ # limitations under the License. import unittest +from unittest import mock -import mock from google.cloud.bigquery.routine.routine import Routine, RoutineReference import pytest from google.cloud.bigquery.dataset import ( diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 67777f923..88378ec98 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -14,8 +14,7 @@ import gc import unittest - -import mock +from unittest import mock try: from google.cloud import bigquery_storage diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 69d33fe17..e9fd2e3dd 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -13,9 +13,9 @@ # limitations under the License. import functools -import mock import operator as op import unittest +from unittest import mock import pytest diff --git a/tests/unit/test_encryption_configuration.py b/tests/unit/test_encryption_configuration.py index f432a903b..cdd944a8f 100644 --- a/tests/unit/test_encryption_configuration.py +++ b/tests/unit/test_encryption_configuration.py @@ -13,7 +13,7 @@ # limitations under the License. import unittest -import mock +from unittest import mock class TestEncryptionConfiguration(unittest.TestCase): diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 0e984c8fc..d7049c5ca 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -14,8 +14,8 @@ import datetime import re +from unittest import mock -import mock import pytest import google.api_core.exceptions diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py index 6f0b55c5e..4ef99fd86 100644 --- a/tests/unit/test_list_datasets.py +++ b/tests/unit/test_list_datasets.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock +from unittest import mock + import pytest from google.cloud.bigquery.retry import DEFAULT_TIMEOUT diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py index 1db6b5668..edb85af0a 100644 --- a/tests/unit/test_list_jobs.py +++ b/tests/unit/test_list_jobs.py @@ -13,8 +13,8 @@ # limitations under the License. import datetime +from unittest import mock -import mock import pytest from google.cloud.bigquery.retry import DEFAULT_TIMEOUT diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py index 190612b44..5260e5246 100644 --- a/tests/unit/test_list_projects.py +++ b/tests/unit/test_list_projects.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import mock +from unittest import mock + import pytest from google.cloud.bigquery.retry import DEFAULT_TIMEOUT diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 1511cba9c..4b1aaf14d 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -15,11 +15,11 @@ import copy import re from concurrent import futures +from unittest import mock import warnings from google.api_core import exceptions import google.auth.credentials -import mock import pytest from tests.unit.helpers import make_connection from test_utils.imports import maybe_fail_import @@ -513,7 +513,7 @@ def test_bigquery_magic_default_connection_user_agent(): with conn_patch as conn, run_query_patch, default_patch: ip.run_cell_magic("bigquery", "", "SELECT 17 as num") - client_info_arg = conn.call_args.kwargs.get("client_info") + client_info_arg = conn.call_args[1].get("client_info") assert client_info_arg is not None assert client_info_arg.user_agent == "ipython-" + IPython.__version__ @@ -663,7 +663,7 @@ def warning_match(warning): assert len(expected_warnings) == 1 assert len(bqstorage_mock.call_args_list) == 1 - kwargs = bqstorage_mock.call_args_list[0].kwargs + kwargs = bqstorage_mock.call_args_list[0][1] assert kwargs.get("credentials") is mock_credentials client_info = kwargs.get("client_info") assert client_info is not None diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index 4cc58713c..e96e18c6b 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -15,8 +15,7 @@ import datetime import importlib import sys - -import mock +from unittest import mock try: import opentelemetry diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index f511bf28d..7c36eb75b 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -15,8 +15,7 @@ import datetime import decimal import unittest - -import mock +from unittest import mock class Test_UDFResource(unittest.TestCase): diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 2fcb84e21..6e533c849 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -13,8 +13,8 @@ # limitations under the License. import unittest +from unittest import mock -import mock import requests.exceptions diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 26ec0dfef..b17cd0281 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -16,8 +16,8 @@ from google.cloud.bigquery.standard_sql import StandardSqlStructType from google.cloud.bigquery.schema import PolicyTagList import unittest +from unittest import mock -import mock import pytest diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 00a7f06e6..0d549120f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -19,9 +19,9 @@ import time import types import unittest +from unittest import mock import warnings -import mock import pytest try: From 438776321f788e62f2d2f7e74f8ae2825740b1ae Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 7 Mar 2024 14:30:13 -0800 Subject: [PATCH 260/536] testing: update CI configurations (#1846) * testing: remove unnecessary prerelease targets This PR does two things: * remove unneeded prerelease-deps configs for removed and nonexisting CI targets * fixes the continuous prerelease-deps-3.12 config --- .../{prerelease-deps-3.11.cfg => prerelease-deps-3.12.cfg} | 2 +- .kokoro/continuous/prerelease-deps-3.8.cfg | 7 ------- .kokoro/continuous/prerelease-deps.cfg | 7 ------- .kokoro/presubmit/prerelease-deps-3.8.cfg | 7 ------- .kokoro/presubmit/prerelease-deps.cfg | 7 ------- 5 files changed, 1 insertion(+), 29 deletions(-) rename .kokoro/continuous/{prerelease-deps-3.11.cfg => prerelease-deps-3.12.cfg} (77%) delete mode 100644 .kokoro/continuous/prerelease-deps-3.8.cfg delete mode 100644 .kokoro/continuous/prerelease-deps.cfg delete mode 100644 .kokoro/presubmit/prerelease-deps-3.8.cfg delete mode 100644 .kokoro/presubmit/prerelease-deps.cfg diff --git a/.kokoro/continuous/prerelease-deps-3.11.cfg b/.kokoro/continuous/prerelease-deps-3.12.cfg similarity index 77% rename from .kokoro/continuous/prerelease-deps-3.11.cfg rename to .kokoro/continuous/prerelease-deps-3.12.cfg index 1e19f1239..ece962a17 100644 --- a/.kokoro/continuous/prerelease-deps-3.11.cfg +++ b/.kokoro/continuous/prerelease-deps-3.12.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.11" + value: "prerelease_deps-3.12" } diff --git a/.kokoro/continuous/prerelease-deps-3.8.cfg b/.kokoro/continuous/prerelease-deps-3.8.cfg deleted file mode 100644 index fabe3e347..000000000 --- a/.kokoro/continuous/prerelease-deps-3.8.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.8" -} diff --git a/.kokoro/continuous/prerelease-deps.cfg b/.kokoro/continuous/prerelease-deps.cfg deleted file mode 100644 index 3595fb43f..000000000 --- a/.kokoro/continuous/prerelease-deps.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps" -} diff --git a/.kokoro/presubmit/prerelease-deps-3.8.cfg b/.kokoro/presubmit/prerelease-deps-3.8.cfg deleted file mode 100644 index fabe3e347..000000000 --- a/.kokoro/presubmit/prerelease-deps-3.8.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.8" -} diff --git a/.kokoro/presubmit/prerelease-deps.cfg b/.kokoro/presubmit/prerelease-deps.cfg deleted file mode 100644 index 3595fb43f..000000000 --- a/.kokoro/presubmit/prerelease-deps.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps" -} From 713ce2c2f6ce9931f67cbbcd63ad436ad336ad26 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 8 Mar 2024 08:34:40 -0500 Subject: [PATCH 261/536] fix: add google-auth as a direct dependency (#1809) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add google-auth as a direct dependency * update constraints * fix(deps): Require `google-api-core>=1.34.1, >=2.11.0` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Lingqing Gan Co-authored-by: Chalmer Lowe Co-authored-by: Owl Bot --- .kokoro/continuous/prerelease-deps.cfg | 7 +++++++ .kokoro/presubmit/prerelease-deps.cfg | 7 +++++++ setup.py | 3 ++- 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 .kokoro/continuous/prerelease-deps.cfg create mode 100644 .kokoro/presubmit/prerelease-deps.cfg diff --git a/.kokoro/continuous/prerelease-deps.cfg b/.kokoro/continuous/prerelease-deps.cfg new file mode 100644 index 000000000..3595fb43f --- /dev/null +++ b/.kokoro/continuous/prerelease-deps.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/.kokoro/presubmit/prerelease-deps.cfg b/.kokoro/presubmit/prerelease-deps.cfg new file mode 100644 index 000000000..3595fb43f --- /dev/null +++ b/.kokoro/presubmit/prerelease-deps.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/setup.py b/setup.py index 9fbc91ecb..5a35f4136 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,8 @@ # NOTE: Maintainers, please do not require google-api-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", + "google-api-core[grpc] >= 1.34.1, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,!=2.10.*", + "google-auth >= 2.14.1, <3.0.0dev", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 From 6dff50f4fbc5aeb644383a4050dd5ffc05015ffe Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 8 Mar 2024 15:43:02 -0800 Subject: [PATCH 262/536] fix: supplementary fix to env-based universe resolution (#1847) * fix: promote env-based universe into client option parsing * lint * add client test * import --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/client.py | 25 ++++++++++++------------- tests/unit/test_client.py | 12 ++++++++++++ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cb4daa897..408e7e49c 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -249,20 +249,19 @@ def __init__( bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None client_universe = None - if client_options: - if isinstance(client_options, dict): - client_options = google.api_core.client_options.from_dict( - client_options + if client_options is None: + client_options = {} + if isinstance(client_options, dict): + client_options = google.api_core.client_options.from_dict(client_options) + if client_options.api_endpoint: + api_endpoint = client_options.api_endpoint + kw_args["api_endpoint"] = api_endpoint + else: + client_universe = _get_client_universe(client_options) + if client_universe != _DEFAULT_UNIVERSE: + kw_args["api_endpoint"] = _DEFAULT_HOST_TEMPLATE.replace( + "{UNIVERSE_DOMAIN}", client_universe ) - if client_options.api_endpoint: - api_endpoint = client_options.api_endpoint - kw_args["api_endpoint"] = api_endpoint - else: - client_universe = _get_client_universe(client_options) - if client_universe != _DEFAULT_UNIVERSE: - kw_args["api_endpoint"] = _DEFAULT_HOST_TEMPLATE.replace( - "{UNIVERSE_DOMAIN}", client_universe - ) # Ensure credentials and universe are not in conflict. if hasattr(self, "_credentials") and client_universe is not None: _validate_universe(client_universe, self._credentials) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index be8bef03c..d20712a8a 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -23,6 +23,7 @@ import itertools import json import operator +import os import unittest from unittest import mock import warnings @@ -171,6 +172,17 @@ def test_ctor_w_empty_client_options(self): client._connection.API_BASE_URL, client._connection.DEFAULT_API_ENDPOINT ) + @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"}) + def test_ctor_w_only_env_universe(self): + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + ) + self.assertEqual(client._connection.API_BASE_URL, "https://bigquery.foo.com") + def test_ctor_w_client_options_dict(self): creds = _make_credentials() http = object() From 4a1ff52ade535e349521b2877bcc762d280bd31b Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 11 Mar 2024 16:56:52 +0100 Subject: [PATCH 263/536] chore(deps): update all dependencies (#1835) * chore(deps): update all dependencies * Update samples/magics/requirements-test.txt * Update samples/notebooks/requirements.txt * Update samples/magics/requirements.txt update. * Update samples/desktopapp/requirements-test.txt * Update samples/geography/requirements-test.txt * Update samples/notebooks/requirements-test.txt * Update samples/snippets/requirements-test.txt --------- Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 3 ++- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 3 ++- samples/geography/requirements.txt | 34 ++++++++++++------------ samples/magics/requirements-test.txt | 3 ++- samples/magics/requirements.txt | 4 +-- samples/notebooks/requirements-test.txt | 3 ++- samples/notebooks/requirements.txt | 6 ++--- samples/snippets/requirements-test.txt | 3 ++- samples/snippets/requirements.txt | 2 +- 10 files changed, 34 insertions(+), 29 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index fc926cd7c..99d27b06a 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.3 +pytest==7.4.4; python_version == '3.7' +pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 8d82d4930..78074bbca 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.14.1 +google-cloud-bigquery==3.18.0 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 7749d1f94..a91fa2d55 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,3 @@ -pytest==7.4.3 +pytest==7.4.4; python_version == '3.7' +pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 47e7cc56e..c85bf06d0 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ -attrs==23.1.0 -certifi==2023.11.17 +attrs==23.2.0 +certifi==2024.2.2 cffi===1.15.1; python_version == '3.7' cffi==1.16.0; python_version >= '3.8' charset-normalizer==3.3.2 @@ -8,43 +8,43 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.2.0 -Fiona==1.9.5 +Fiona==1.9.6 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' -geopandas==0.14.1; python_version >= '3.9' -google-api-core==2.15.0 -google-auth==2.25.2 -google-cloud-bigquery==3.14.1 +geopandas==0.14.3; python_version >= '3.9' +google-api-core==2.17.1 +google-auth==2.28.2 +google-cloud-bigquery==3.18.0 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 -googleapis-common-protos==1.62.0 -grpcio==1.60.0 +googleapis-common-protos==1.63.0 +grpcio==1.62.1 idna==3.6 munch==4.0.0 mypy-extensions==1.0.0 -packaging==23.2 +packaging==24.0 pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.1.0; python_version >= '3.9' +pandas==2.2.1; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==14.0.1; python_version >= '3.8' +pyarrow==15.0.1; python_version >= '3.8' pyasn1==0.5.1 pyasn1-modules==0.3.0 pycparser==2.21 -pyparsing==3.1.1 -python-dateutil==2.8.2 -pytz==2023.3.post1 +pyparsing==3.1.2 +python-dateutil==2.9.0.post0 +pytz==2024.1 PyYAML==6.0.1 requests==2.31.0 rsa==4.9 -Shapely==2.0.2 +Shapely==2.0.3 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.9.0; python_version >= '3.8' +typing-extensions==4.10.0; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index fc926cd7c..99d27b06a 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.3 +pytest==7.4.4; python_version == '3.7' +pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 869d3b4d5..ea987358f 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,9 +1,9 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.14.1 +google.cloud.bigquery==3.18.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.1.0; python_version >= '3.9' +pandas==2.2.1; python_version >= '3.9' diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index fc926cd7c..99d27b06a 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.3 +pytest==7.4.4; python_version == '3.7' +pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index e8839e1fe..5ce95818e 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,12 +1,12 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.14.1 +google-cloud-bigquery==3.18.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.8.2; python_version >= '3.9' +matplotlib==3.8.3; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.1.0; python_version >= '3.9' +pandas==2.2.1; python_version >= '3.9' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index fc926cd7c..99d27b06a 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.3 +pytest==7.4.4; python_version == '3.7' +pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 365d584c7..fc0a2ef36 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-bigquery==3.14.1 \ No newline at end of file +google-cloud-bigquery==3.18.0 \ No newline at end of file From 641a712766bf68d2fa94467577845b5d07e7b1eb Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 11 Mar 2024 12:30:59 -0400 Subject: [PATCH 264/536] chore(main): release 3.19.0 (#1840) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 17 +++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 350787512..4cb0e1d20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,23 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.19.0](https://github.com/googleapis/python-bigquery/compare/v3.18.0...v3.19.0) (2024-03-11) + + +### Features + +* Support RANGE query parameters ([#1827](https://github.com/googleapis/python-bigquery/issues/1827)) ([b359a9a](https://github.com/googleapis/python-bigquery/commit/b359a9a55936a759a36aa69c5e5b014685e1fca6)) +* Support range sql ([#1807](https://github.com/googleapis/python-bigquery/issues/1807)) ([86a45c9](https://github.com/googleapis/python-bigquery/commit/86a45c989836b34dca456bac014352e55d6f86c0)) + + +### Bug Fixes + +* Add google-auth as a direct dependency ([713ce2c](https://github.com/googleapis/python-bigquery/commit/713ce2c2f6ce9931f67cbbcd63ad436ad336ad26)) +* Augment universe_domain handling ([#1837](https://github.com/googleapis/python-bigquery/issues/1837)) ([53c2cbf](https://github.com/googleapis/python-bigquery/commit/53c2cbf98d2961f553747514de273bcd5c117f0e)) +* **deps:** Require google-api-core>=1.34.1, >=2.11.0 ([713ce2c](https://github.com/googleapis/python-bigquery/commit/713ce2c2f6ce9931f67cbbcd63ad436ad336ad26)) +* Supplementary fix to env-based universe resolution ([#1844](https://github.com/googleapis/python-bigquery/issues/1844)) ([b818992](https://github.com/googleapis/python-bigquery/commit/b8189929b6008f7780214822062f8ed05d8d2a01)) +* Supplementary fix to env-based universe resolution ([#1847](https://github.com/googleapis/python-bigquery/issues/1847)) ([6dff50f](https://github.com/googleapis/python-bigquery/commit/6dff50f4fbc5aeb644383a4050dd5ffc05015ffe)) + ## [3.18.0](https://github.com/googleapis/python-bigquery/compare/v3.17.2...v3.18.0) (2024-02-29) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 89024cc08..27f24bd19 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.18.0" +__version__ = "3.19.0" From 2660dbd4821a89a1e20e3e1541504a409f1979aa Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 11 Mar 2024 10:04:17 -0700 Subject: [PATCH 265/536] fix: correct type checking (#1848) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct the way we check whether `self._done_timeout` is an instance of `object` class or not. Fixes #1838 🦕 --- google/cloud/bigquery/job/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index e45a46894..83d2751ce 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1409,9 +1409,9 @@ def _reload_query_results( # Python_API_core, as part of a major rewrite of the deadline, timeout, # retry process sets the timeout value as a Python object(). # Our system does not natively handle that and instead expects - # either none or a numeric value. If passed a Python object, convert to + # either None or a numeric value. If passed a Python object, convert to # None. - if isinstance(self._done_timeout, object): # pragma: NO COVER + if type(self._done_timeout) is object: # pragma: NO COVER self._done_timeout = None if self._done_timeout is not None: # pragma: NO COVER From 04f23780b66c4487333dbc592410a6930c9308ae Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 12 Mar 2024 13:33:12 +0100 Subject: [PATCH 266/536] chore(deps): update all dependencies (#1849) * chore(deps): update all dependencies * Update samples/desktopapp/requirements-test.txt * Update samples/geography/requirements-test.txt * Update samples/magics/requirements-test.txt * Update samples/magics/requirements.txt * Update samples/notebooks/requirements-test.txt * Update samples/notebooks/requirements.txt * Update samples/snippets/requirements-test.txt --------- Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 2 +- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 99d27b06a..9142d4905 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.4; python_version == '3.7' +pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 78074bbca..8561934dc 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.18.0 +google-cloud-bigquery==3.19.0 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index a91fa2d55..f052969d3 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==7.4.4; python_version == '3.7' +pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c85bf06d0..b474e252c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -15,7 +15,7 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' google-api-core==2.17.1 google-auth==2.28.2 -google-cloud-bigquery==3.18.0 +google-cloud-bigquery==3.19.0 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 99d27b06a..9142d4905 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.4; python_version == '3.7' +pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index ea987358f..9179db067 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,9 +1,9 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.18.0 +google.cloud.bigquery==3.19.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.18.1; python_version >= '3.9' +ipython===8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.1; python_version >= '3.9' diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 99d27b06a..9142d4905 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.4; python_version == '3.7' +pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 5ce95818e..8f2e93620 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.18.0 +google-cloud-bigquery==3.19.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.18.1; python_version >= '3.9' +ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' matplotlib==3.8.3; python_version >= '3.9' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 99d27b06a..9142d4905 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 -pytest==7.4.4; python_version == '3.7' +pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index fc0a2ef36..b3347499f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-bigquery==3.18.0 \ No newline at end of file +google-cloud-bigquery==3.19.0 \ No newline at end of file From dc932415a0ee96dc5ae3601d0e757c70712291a0 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 12 Mar 2024 09:01:49 -0700 Subject: [PATCH 267/536] testing: unhook prerelease-deps-3.12 from presubmit (#1851) Testing for prerelease-deps is done within continuous. Co-authored-by: Chalmer Lowe --- .kokoro/presubmit/prerelease-deps-3.12.cfg | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .kokoro/presubmit/prerelease-deps-3.12.cfg diff --git a/.kokoro/presubmit/prerelease-deps-3.12.cfg b/.kokoro/presubmit/prerelease-deps-3.12.cfg deleted file mode 100644 index ece962a17..000000000 --- a/.kokoro/presubmit/prerelease-deps-3.12.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps-3.12" -} From 71393e0a40a64911700d67b5bf527ec44e35e360 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 15 Mar 2024 10:32:03 -0700 Subject: [PATCH 268/536] chore(python): add requirements for docs build (#1858) Source-Link: https://github.com/googleapis/synthtool/commit/85c23b6bc4352c1b0674848eaeb4e48645aeda6b Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3741fd1f5f5150378563c76afa06bcc12777b5fe54c5ee01115218f83872134f Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 +-- .kokoro/build.sh | 7 ----- .kokoro/docker/docs/Dockerfile | 4 +++ .kokoro/docker/docs/requirements.in | 1 + .kokoro/docker/docs/requirements.txt | 38 ++++++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 .kokoro/docker/docs/requirements.in create mode 100644 .kokoro/docker/docs/requirements.txt diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index e4e943e02..5d9542b1c 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:98f3afd11308259de6e828e37376d18867fd321aba07826e29e4f8d9cab56bad -# created: 2024-02-27T15:56:18.442440378Z + digest: sha256:3741fd1f5f5150378563c76afa06bcc12777b5fe54c5ee01115218f83872134f +# created: 2024-03-15T16:26:15.743347415Z diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 0cb0d0dd0..f38bda804 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -33,13 +33,6 @@ export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json # Setup project id. export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") -# Remove old nox -python3 -m pip uninstall --yes --quiet nox-automation - -# Install nox -python3 -m pip install --upgrade --quiet nox -python3 -m nox --version - # If this is a continuous build, send the test log to the FlakyBot. # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index 8e39a2cc4..bdaf39fe2 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -80,4 +80,8 @@ RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ # Test pip RUN python3 -m pip +# Install build requirements +COPY requirements.txt /requirements.txt +RUN python3 -m pip install --require-hashes -r requirements.txt + CMD ["python3.8"] diff --git a/.kokoro/docker/docs/requirements.in b/.kokoro/docker/docs/requirements.in new file mode 100644 index 000000000..816817c67 --- /dev/null +++ b/.kokoro/docker/docs/requirements.in @@ -0,0 +1 @@ +nox diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt new file mode 100644 index 000000000..0e5d70f20 --- /dev/null +++ b/.kokoro/docker/docs/requirements.txt @@ -0,0 +1,38 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --allow-unsafe --generate-hashes requirements.in +# +argcomplete==3.2.3 \ + --hash=sha256:bf7900329262e481be5a15f56f19736b376df6f82ed27576fa893652c5de6c23 \ + --hash=sha256:c12355e0494c76a2a7b73e3a59b09024ca0ba1e279fb9ed6c1b82d5b74b6a70c + # via nox +colorlog==6.8.2 \ + --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ + --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 + # via nox +distlib==0.3.8 \ + --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ + --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 + # via virtualenv +filelock==3.13.1 \ + --hash=sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e \ + --hash=sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c + # via virtualenv +nox==2024.3.2 \ + --hash=sha256:e53514173ac0b98dd47585096a55572fe504fecede58ced708979184d05440be \ + --hash=sha256:f521ae08a15adbf5e11f16cb34e8d0e6ea521e0b92868f684e91677deb974553 + # via -r requirements.in +packaging==24.0 \ + --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ + --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 + # via nox +platformdirs==4.2.0 \ + --hash=sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068 \ + --hash=sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768 + # via virtualenv +virtualenv==20.25.1 \ + --hash=sha256:961c026ac520bac5f69acb8ea063e8a4f071bcc9457b9c1f28f6b085c511583a \ + --hash=sha256:e08e13ecdca7a0bd53798f356d5831434afa5b07b93f0abdf0797b7a06ffe197 + # via nox From c1b4dfff71f0c64078cb104479eaabf94b20d176 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 15 Mar 2024 15:07:31 -0700 Subject: [PATCH 269/536] testing: reduce python versions in unit testing (#1857) * testing: evaluate reducing versions under unit test * align unit and system versions under test * opt 3.7 back in * widen range of versions --------- Co-authored-by: Lingqing Gan --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index c31d098b8..9445f4f74 100644 --- a/noxfile.py +++ b/noxfile.py @@ -38,7 +38,7 @@ DEFAULT_PYTHON_VERSION = "3.8" SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11", "3.12"] -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.12"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() # 'docfx' is excluded since it only needs to run in 'docs-presubmit' From bb59f734cfcff4912b95dde1f79a48d5d8978bfe Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 15 Mar 2024 16:12:39 -0700 Subject: [PATCH 270/536] chore(python): update dependencies in /.kokoro (#1859) Source-Link: https://github.com/googleapis/synthtool/commit/db94845da69ccdfefd7ce55c84e6cfa74829747e Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:a8a80fc6456e433df53fc2a0d72ca0345db0ddefb409f1b75b118dfd1babd952 Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .github/.OwlBot.lock.yaml | 4 +- .kokoro/requirements.in | 3 +- .kokoro/requirements.txt | 114 +++++++++++++++++--------------------- 3 files changed, 56 insertions(+), 65 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 5d9542b1c..dc9c56e9d 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3741fd1f5f5150378563c76afa06bcc12777b5fe54c5ee01115218f83872134f -# created: 2024-03-15T16:26:15.743347415Z + digest: sha256:a8a80fc6456e433df53fc2a0d72ca0345db0ddefb409f1b75b118dfd1babd952 +# created: 2024-03-15T16:25:47.905264637Z \ No newline at end of file diff --git a/.kokoro/requirements.in b/.kokoro/requirements.in index ec867d9fd..fff4d9ce0 100644 --- a/.kokoro/requirements.in +++ b/.kokoro/requirements.in @@ -1,5 +1,5 @@ gcp-docuploader -gcp-releasetool>=1.10.5 # required for compatibility with cryptography>=39.x +gcp-releasetool>=2 # required for compatibility with cryptography>=42.x importlib-metadata typing-extensions twine @@ -8,3 +8,4 @@ setuptools nox>=2022.11.21 # required to remove dependency on py charset-normalizer<3 click<8.1.0 +cryptography>=42.0.5 diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index bda8e38c4..dd61f5f32 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -93,40 +93,41 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==42.0.4 \ - --hash=sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b \ - --hash=sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce \ - --hash=sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88 \ - --hash=sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7 \ - --hash=sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20 \ - --hash=sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9 \ - --hash=sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff \ - --hash=sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1 \ - --hash=sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764 \ - --hash=sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b \ - --hash=sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298 \ - --hash=sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1 \ - --hash=sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824 \ - --hash=sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257 \ - --hash=sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a \ - --hash=sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129 \ - --hash=sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb \ - --hash=sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929 \ - --hash=sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854 \ - --hash=sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52 \ - --hash=sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923 \ - --hash=sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885 \ - --hash=sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0 \ - --hash=sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd \ - --hash=sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2 \ - --hash=sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18 \ - --hash=sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b \ - --hash=sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992 \ - --hash=sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74 \ - --hash=sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660 \ - --hash=sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925 \ - --hash=sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449 +cryptography==42.0.5 \ + --hash=sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee \ + --hash=sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576 \ + --hash=sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d \ + --hash=sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30 \ + --hash=sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413 \ + --hash=sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb \ + --hash=sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da \ + --hash=sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4 \ + --hash=sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd \ + --hash=sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc \ + --hash=sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8 \ + --hash=sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1 \ + --hash=sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc \ + --hash=sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e \ + --hash=sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8 \ + --hash=sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940 \ + --hash=sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400 \ + --hash=sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7 \ + --hash=sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16 \ + --hash=sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278 \ + --hash=sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74 \ + --hash=sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec \ + --hash=sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1 \ + --hash=sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2 \ + --hash=sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c \ + --hash=sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922 \ + --hash=sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a \ + --hash=sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6 \ + --hash=sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1 \ + --hash=sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e \ + --hash=sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac \ + --hash=sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7 # via + # -r requirements.in # gcp-releasetool # secretstorage distlib==0.3.7 \ @@ -145,9 +146,9 @@ gcp-docuploader==0.6.5 \ --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea # via -r requirements.in -gcp-releasetool==1.16.0 \ - --hash=sha256:27bf19d2e87aaa884096ff941aa3c592c482be3d6a2bfe6f06afafa6af2353e3 \ - --hash=sha256:a316b197a543fd036209d0caba7a8eb4d236d8e65381c80cbc6d7efaa7606d63 +gcp-releasetool==2.0.0 \ + --hash=sha256:3d73480b50ba243f22d7c7ec08b115a30e1c7817c4899781840c26f9c55b8277 \ + --hash=sha256:7aa9fd935ec61e581eb8458ad00823786d91756c25e492f372b2b30962f3c28f # via -r requirements.in google-api-core==2.12.0 \ --hash=sha256:c22e01b1e3c4dcd90998494879612c38d0a3411d1f7b679eb89e2abe3ce1f553 \ @@ -392,29 +393,18 @@ platformdirs==3.11.0 \ --hash=sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3 \ --hash=sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e # via virtualenv -protobuf==3.20.3 \ - --hash=sha256:03038ac1cfbc41aa21f6afcbcd357281d7521b4157926f30ebecc8d4ea59dcb7 \ - --hash=sha256:28545383d61f55b57cf4df63eebd9827754fd2dc25f80c5253f9184235db242c \ - --hash=sha256:2e3427429c9cffebf259491be0af70189607f365c2f41c7c3764af6f337105f2 \ - --hash=sha256:398a9e0c3eaceb34ec1aee71894ca3299605fa8e761544934378bbc6c97de23b \ - --hash=sha256:44246bab5dd4b7fbd3c0c80b6f16686808fab0e4aca819ade6e8d294a29c7050 \ - --hash=sha256:447d43819997825d4e71bf5769d869b968ce96848b6479397e29fc24c4a5dfe9 \ - --hash=sha256:67a3598f0a2dcbc58d02dd1928544e7d88f764b47d4a286202913f0b2801c2e7 \ - --hash=sha256:74480f79a023f90dc6e18febbf7b8bac7508420f2006fabd512013c0c238f454 \ - --hash=sha256:819559cafa1a373b7096a482b504ae8a857c89593cf3a25af743ac9ecbd23480 \ - --hash=sha256:899dc660cd599d7352d6f10d83c95df430a38b410c1b66b407a6b29265d66469 \ - --hash=sha256:8c0c984a1b8fef4086329ff8dd19ac77576b384079247c770f29cc8ce3afa06c \ - --hash=sha256:9aae4406ea63d825636cc11ffb34ad3379335803216ee3a856787bcf5ccc751e \ - --hash=sha256:a7ca6d488aa8ff7f329d4c545b2dbad8ac31464f1d8b1c87ad1346717731e4db \ - --hash=sha256:b6cc7ba72a8850621bfec987cb72623e703b7fe2b9127a161ce61e61558ad905 \ - --hash=sha256:bf01b5720be110540be4286e791db73f84a2b721072a3711efff6c324cdf074b \ - --hash=sha256:c02ce36ec760252242a33967d51c289fd0e1c0e6e5cc9397e2279177716add86 \ - --hash=sha256:d9e4432ff660d67d775c66ac42a67cf2453c27cb4d738fc22cb53b5d84c135d4 \ - --hash=sha256:daa564862dd0d39c00f8086f88700fdbe8bc717e993a21e90711acfed02f2402 \ - --hash=sha256:de78575669dddf6099a8a0f46a27e82a1783c557ccc38ee620ed8cc96d3be7d7 \ - --hash=sha256:e64857f395505ebf3d2569935506ae0dfc4a15cb80dc25261176c784662cdcc4 \ - --hash=sha256:f4bd856d702e5b0d96a00ec6b307b0f51c1982c2bf9c0052cf9019e9a544ba99 \ - --hash=sha256:f4c42102bc82a51108e449cbb32b19b180022941c727bac0cfd50170341f16ee +protobuf==4.25.3 \ + --hash=sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4 \ + --hash=sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8 \ + --hash=sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c \ + --hash=sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d \ + --hash=sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4 \ + --hash=sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa \ + --hash=sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c \ + --hash=sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019 \ + --hash=sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9 \ + --hash=sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c \ + --hash=sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2 # via # gcp-docuploader # gcp-releasetool @@ -518,7 +508,7 @@ zipp==3.17.0 \ # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==68.2.2 \ - --hash=sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87 \ - --hash=sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a +setuptools==69.2.0 \ + --hash=sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e \ + --hash=sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c # via -r requirements.in From 38b8e5390f373b45e7cc7611d67e3fd7db8ec5e8 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 19 Mar 2024 00:56:11 +0100 Subject: [PATCH 271/536] chore(deps): update dependency pyarrow to v15.0.2 (#1861) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index b474e252c..c7a793358 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -31,7 +31,7 @@ pandas===2.0.3; python_version == '3.8' pandas==2.2.1; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==15.0.1; python_version >= '3.8' +pyarrow==15.0.2; python_version >= '3.8' pyasn1==0.5.1 pyasn1-modules==0.3.0 pycparser==2.21 From 0ac6e9bf186945832f5dcdf5a4d95667b4da223e Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 19 Mar 2024 09:51:49 -0400 Subject: [PATCH 272/536] fix: update error logging when converting to pyarrow column fails (#1836) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: update error logging when converting to pyarrow column fails * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * resolve merge conflict * resolve missing dependency * more tweaks to constraints and requirements re pyarrow * even more tweaks to constraints and requirements re pyarrow * a few more tweaks to constraints and requirements re pyarrow * resolves issue of pyarrow not installing * fix linting issue * update linting and conditionals * update linting and mypy comments * quick tags on several coverage issues related to imports * adds pragma to exception * updates test suite with new test and makes msg explicit * temporarily adding timing code * additional timing test mods * add pragmas to account for several tests * cleaned up some test code * cleaned up some test code * Update a test to include column datatype * update to pytest.raises command * Update tests/unit/test__pandas_helpers.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removed unused variable 'e' --------- Co-authored-by: Owl Bot --- google/cloud/bigquery/_pandas_helpers.py | 20 +++++++++++++------- google/cloud/bigquery/_pyarrow_helpers.py | 2 +- noxfile.py | 15 +++++++++++---- samples/desktopapp/requirements-test.txt | 1 + samples/snippets/requirements-test.txt | 1 + testing/constraints-3.11.txt | 1 + testing/constraints-3.12.txt | 1 + testing/constraints-3.7.txt | 2 +- tests/unit/test__pandas_helpers.py | 17 +++++++++++++++-- tests/unit/test_table.py | 4 ++-- 10 files changed, 47 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index e97dda7e5..9f8dcfde4 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -49,10 +49,11 @@ db_dtypes_import_exception = exc date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype -pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) +from pyarrow import ArrowTypeError # type: ignore # noqa: E402 _BIGNUMERIC_SUPPORT = False -if pyarrow is not None: +if pyarrow is not None: # pragma: NO COVER _BIGNUMERIC_SUPPORT = True try: @@ -302,11 +303,16 @@ def bq_to_arrow_array(series, bq_field): field_type_upper = bq_field.field_type.upper() if bq_field.field_type else "" - if bq_field.mode.upper() == "REPEATED": - return pyarrow.ListArray.from_pandas(series, type=arrow_type) - if field_type_upper in schema._STRUCT_TYPES: - return pyarrow.StructArray.from_pandas(series, type=arrow_type) - return pyarrow.Array.from_pandas(series, type=arrow_type) + try: + if bq_field.mode.upper() == "REPEATED": + return pyarrow.ListArray.from_pandas(series, type=arrow_type) + if field_type_upper in schema._STRUCT_TYPES: + return pyarrow.StructArray.from_pandas(series, type=arrow_type) + return pyarrow.Array.from_pandas(series, type=arrow_type) + except ArrowTypeError: # pragma: NO COVER + msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray""" + _LOGGER.error(msg) + raise ArrowTypeError(msg) def get_column_or_index(dataframe, name): diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 946743eaf..06509cc93 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -49,7 +49,7 @@ def pyarrow_timestamp(): _BQ_TO_ARROW_SCALARS = {} _ARROW_SCALAR_IDS_TO_BQ = {} -if pyarrow: +if pyarrow: # pragma: NO COVER # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py # When modifying it be sure to update it there as well. # Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py diff --git a/noxfile.py b/noxfile.py index 9445f4f74..548690afa 100644 --- a/noxfile.py +++ b/noxfile.py @@ -18,7 +18,6 @@ import os import re import shutil - import nox @@ -66,6 +65,7 @@ def default(session, install_extras=True): Python corresponding to the ``nox`` binary the ``PATH`` can run the tests. """ + constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) @@ -86,8 +86,7 @@ def default(session, install_extras=True): install_target = ".[all]" else: install_target = "." - session.install("-e", install_target, "-c", constraints_path) - + session.install("-e", install_target) session.run("python", "-m", "pip", "freeze") # Run py.test against the unit tests. @@ -108,6 +107,7 @@ def default(session, install_extras=True): @nox.session(python=UNIT_TEST_PYTHON_VERSIONS) def unit(session): """Run the unit test suite.""" + default(session) @@ -118,8 +118,11 @@ def unit_noextras(session): # Install optional dependencies that are out-of-date. # https://github.com/googleapis/python-bigquery/issues/933 # There is no pyarrow 1.0.0 package for Python 3.9. + if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - session.install("pyarrow==1.0.0") + session.install("pyarrow>=3.0.0") + elif session.python == UNIT_TEST_PYTHON_VERSIONS[-1]: + session.install("pyarrow") default(session, install_extras=False) @@ -127,6 +130,7 @@ def unit_noextras(session): @nox.session(python=DEFAULT_PYTHON_VERSION) def mypy(session): """Run type checks with mypy.""" + session.install("-e", ".[all]") session.install(MYPY_VERSION) @@ -147,6 +151,7 @@ def pytype(session): # An indirect dependecy attrs==21.1.0 breaks the check, and installing a less # recent version avoids the error until a possibly better fix is found. # https://github.com/googleapis/python-bigquery/issues/655 + session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) @@ -206,6 +211,7 @@ def system(session): @nox.session(python=DEFAULT_PYTHON_VERSION) def mypy_samples(session): """Run type checks with mypy.""" + session.install("pytest") for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): session.install("-r", str(requirements_path)) @@ -283,6 +289,7 @@ def cover(session): This outputs the coverage report aggregating coverage from the unit test runs (not system test runs), and then erases coverage data. """ + session.install("coverage", "pytest-cov") session.run("coverage", "report", "--show-missing", "--fail-under=100") session.run("coverage", "erase") diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 9142d4905..413a7fd48 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -2,3 +2,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 +pyarrow>=3.0.0 \ No newline at end of file diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 9142d4905..413a7fd48 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -2,3 +2,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 +pyarrow>=3.0.0 \ No newline at end of file diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt index e69de29bb..e80ca0ccf 100644 --- a/testing/constraints-3.11.txt +++ b/testing/constraints-3.11.txt @@ -0,0 +1 @@ +pyarrow>=3.0.0 \ No newline at end of file diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt index e69de29bb..e80ca0ccf 100644 --- a/testing/constraints-3.12.txt +++ b/testing/constraints-3.12.txt @@ -0,0 +1 @@ +pyarrow>=3.0.0 \ No newline at end of file diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 28787adb7..1fc7c6838 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -27,7 +27,7 @@ packaging==20.0.0 pandas==1.1.0 proto-plus==1.22.0 protobuf==3.19.5 -pyarrow==3.0.0 +pyarrow>=3.0.0 python-dateutil==2.7.3 requests==2.21.0 Shapely==1.8.4 diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index abee39065..244384620 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -53,6 +53,7 @@ if pyarrow: import pyarrow.parquet import pyarrow.types + from pyarrow import ArrowTypeError # type: ignore # noqa: E402 else: # pragma: NO COVER # Mock out pyarrow when missing, because methods from pyarrow.types are # used in test parameterization. @@ -557,13 +558,25 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] - series = pandas.Series(rows, dtype="object") + series = pandas.Series(rows, name="test_col", dtype="object") bq_field = schema.SchemaField("field_name", "INTEGER", mode="REPEATED") arrow_array = module_under_test.bq_to_arrow_array(series, bq_field) roundtrip = arrow_array.to_pylist() assert rows == roundtrip +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_conversion_fail(module_under_test): # pragma: NO COVER + rows = [[1, 2, 3], [], [4, 5, 6]] + series = pandas.Series(rows, name="test_col", dtype="object") + bq_field = schema.SchemaField("field_name", "STRING", mode="REPEATED") + exc_msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray""" + with pytest.raises(ArrowTypeError, match=exc_msg): + module_under_test.bq_to_arrow_array(series, bq_field) + raise ArrowTypeError(exc_msg) + + @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") @@ -573,7 +586,7 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): None, {"int_col": 456, "string_col": "def"}, ] - series = pandas.Series(rows, dtype="object") + series = pandas.Series(rows, name="test_col", dtype="object") bq_field = schema.SchemaField( "field_name", bq_type, diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 0d549120f..a8107ee97 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -49,7 +49,7 @@ pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() -if pyarrow: +if pyarrow: # pragma: NO COVER import pyarrow.types try: @@ -3743,7 +3743,7 @@ def test_to_dataframe_w_dtypes_mapper(self): if hasattr(pandas, "Float64Dtype"): self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) self.assertEqual(df.miles.dtype.name, "Float64") - else: + else: # pragma: NO COVER self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) self.assertEqual(df.miles.dtype.name, "string") From f8f70a35b562d13ca5ae87f29e1ddf0f3833ff70 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 21 Mar 2024 13:16:00 +0100 Subject: [PATCH 273/536] chore(deps): update dependency google-auth to v2.29.0 (#1865) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c7a793358..1cb20b102 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' google-api-core==2.17.1 -google-auth==2.28.2 +google-auth==2.29.0 google-cloud-bigquery==3.19.0 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 From b0e95a05d7dbe94f246600ed6c5ebbc2b6a7013f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 22 Mar 2024 11:42:22 +0100 Subject: [PATCH 274/536] chore(deps): update dependency google-api-core to v2.18.0 (#1866) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1cb20b102..6fa7ffc7e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' -google-api-core==2.17.1 +google-api-core==2.18.0 google-auth==2.29.0 google-cloud-bigquery==3.19.0 google-cloud-bigquery-storage==2.24.0 From e265db6a6a37d13056dcaac240c2cf3975dfd644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 27 Mar 2024 09:58:18 -0500 Subject: [PATCH 275/536] fix: use an allowlist instead of denylist to determine when `query_and_wait` uses `jobs.query` API (#1869) --- google/cloud/bigquery/_job_helpers.py | 53 ++++++++++++++++++--------- tests/unit/test__job_helpers.py | 15 +++++++- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 0692c9b65..602a49eba 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -400,9 +400,13 @@ def query_and_wait( :class:`~google.cloud.bigquery.job.QueryJobConfig` class. """ + request_body = _to_query_request( + query=query, job_config=job_config, location=location, timeout=api_timeout + ) + # Some API parameters aren't supported by the jobs.query API. In these # cases, fallback to a jobs.insert call. - if not _supported_by_jobs_query(job_config): + if not _supported_by_jobs_query(request_body): return _wait_or_cancel( query_jobs_insert( client=client, @@ -424,9 +428,6 @@ def query_and_wait( ) path = _to_query_path(project) - request_body = _to_query_request( - query=query, job_config=job_config, location=location, timeout=api_timeout - ) if page_size is not None and max_results is not None: request_body["maxResults"] = min(page_size, max_results) @@ -506,20 +507,38 @@ def do_query(): return do_query() -def _supported_by_jobs_query(job_config: Optional[job.QueryJobConfig]) -> bool: +def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: """True if jobs.query can be used. False if jobs.insert is needed.""" - if job_config is None: - return True - - return ( - # These features aren't supported by jobs.query. - job_config.clustering_fields is None - and job_config.destination is None - and job_config.destination_encryption_configuration is None - and job_config.range_partitioning is None - and job_config.table_definitions is None - and job_config.time_partitioning is None - ) + request_keys = frozenset(request_body.keys()) + + # Per issue: https://github.com/googleapis/python-bigquery/issues/1867 + # use an allowlist here instead of a denylist because the backend API allows + # unsupported parameters without any warning or failure. Instead, keep this + # set in sync with those in QueryRequest: + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest + keys_allowlist = { + "kind", + "query", + "maxResults", + "defaultDataset", + "timeoutMs", + "dryRun", + "preserveNulls", + "useQueryCache", + "useLegacySql", + "parameterMode", + "queryParameters", + "location", + "formatOptions", + "connectionProperties", + "labels", + "maximumBytesBilled", + "requestId", + "createSession", + } + + unsupported_keys = request_keys - keys_allowlist + return len(unsupported_keys) == 0 def _wait_or_cancel( diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index c30964c57..671b829f7 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -22,6 +22,7 @@ import pytest from google.cloud.bigquery.client import Client +from google.cloud.bigquery import enums from google.cloud.bigquery import _job_helpers from google.cloud.bigquery.job import copy_ as job_copy from google.cloud.bigquery.job import extract as job_extract @@ -1141,12 +1142,22 @@ def test_make_job_id_w_job_id_overrides_prefix(): False, id="destination_encryption_configuration", ), + # priority="BATCH" is not supported. See: + # https://github.com/googleapis/python-bigquery/issues/1867 + pytest.param( + job_query.QueryJobConfig( + priority=enums.QueryPriority.BATCH, + ), + False, + id="priority=BATCH", + ), ), ) -def test_supported_by_jobs_query( +def test_supported_by_jobs_query_from_queryjobconfig( job_config: Optional[job_query.QueryJobConfig], expected: bool ): - assert _job_helpers._supported_by_jobs_query(job_config) == expected + request_body = _job_helpers._to_query_request(job_config, query="SELECT 1") + assert _job_helpers._supported_by_jobs_query(request_body) == expected def test_wait_or_cancel_no_exception(): From 08b1e6f9c41121907c345daedbae40ece18e8b6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 27 Mar 2024 10:28:27 -0500 Subject: [PATCH 276/536] feat: add `fields` parameter to `set_iam_policy` for consistency with update methods (#1872) --- google/cloud/bigquery/client.py | 79 +++++++++++++++++++++- samples/snippets/create_iam_policy_test.py | 44 ++++++++++++ tests/system/test_client.py | 28 -------- tests/unit/test_client.py | 67 ++++++++++++++++++ 4 files changed, 188 insertions(+), 30 deletions(-) create mode 100644 samples/snippets/create_iam_policy_test.py diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 408e7e49c..5521e2e1e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -882,6 +882,35 @@ def get_iam_policy( retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Policy: + """Return the access control policy for a table resource. + + Args: + table (Union[ \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ + str, \ + ]): + The table to get the access control policy for. + If a string is passed in, this method attempts to create a + table reference from a string using + :func:`~google.cloud.bigquery.table.TableReference.from_string`. + requested_policy_version (int): + Optional. The maximum policy version that will be used to format the policy. + + Only version ``1`` is currently supported. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/GetPolicyOptions + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Returns: + google.api_core.iam.Policy: + The access control policy. + """ table = _table_arg_to_table_ref(table, default_project=self.project) if requested_policy_version != 1: @@ -910,7 +939,53 @@ def set_iam_policy( updateMask: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + *, + fields: Sequence[str] = (), ) -> Policy: + """Return the access control policy for a table resource. + + Args: + table (Union[ \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.table.TableListItem, \ + str, \ + ]): + The table to get the access control policy for. + If a string is passed in, this method attempts to create a + table reference from a string using + :func:`~google.cloud.bigquery.table.TableReference.from_string`. + policy (google.api_core.iam.Policy): + The access control policy to set. + updateMask (Optional[str]): + Mask as defined by + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask + + Incompatible with ``fields``. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + fields (Sequence[str]): + Which properties to set on the policy. See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask + + Incompatible with ``updateMask``. + + Returns: + google.api_core.iam.Policy: + The updated access control policy. + """ + if updateMask is not None and not fields: + update_mask = updateMask + elif updateMask is not None and fields: + raise ValueError("Cannot set both fields and updateMask") + elif fields: + update_mask = ",".join(fields) + else: + update_mask = None + table = _table_arg_to_table_ref(table, default_project=self.project) if not isinstance(policy, (Policy)): @@ -918,8 +993,8 @@ def set_iam_policy( body = {"policy": policy.to_api_repr()} - if updateMask is not None: - body["updateMask"] = updateMask + if update_mask is not None: + body["updateMask"] = update_mask path = "{}:setIamPolicy".format(table.path) span_attributes = {"path": path} diff --git a/samples/snippets/create_iam_policy_test.py b/samples/snippets/create_iam_policy_test.py new file mode 100644 index 000000000..c41ced2cd --- /dev/null +++ b/samples/snippets/create_iam_policy_test.py @@ -0,0 +1,44 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_create_iam_policy(table_id: str): + your_table_id = table_id + + # [START bigquery_create_iam_policy] + from google.cloud import bigquery + + bqclient = bigquery.Client() + + policy = bqclient.get_iam_policy( + your_table_id, # e.g. "project.dataset.table" + ) + + analyst_email = "example-analyst-group@google.com" + binding = { + "role": "roles/bigquery.dataViewer", + "members": {f"group:{analyst_email}"}, + } + policy.bindings.append(binding) + + updated_policy = bqclient.set_iam_policy( + your_table_id, # e.g. "project.dataset.table" + policy, + ) + + for binding in updated_policy.bindings: + print(repr(binding)) + # [END bigquery_create_iam_policy] + + assert binding in updated_policy.bindings diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 04740de8a..414239323 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -36,7 +36,6 @@ from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests -from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference @@ -1485,33 +1484,6 @@ def test_copy_table(self): got_rows = self._fetch_single_page(dest_table) self.assertTrue(len(got_rows) > 0) - def test_get_set_iam_policy(self): - from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE - - dataset = self.temp_dataset(_make_dataset_id("create_table")) - table_id = "test_table" - table_ref = Table(dataset.table(table_id)) - self.assertFalse(_table_exists(table_ref)) - - table = helpers.retry_403(Config.CLIENT.create_table)(table_ref) - self.to_delete.insert(0, table) - - self.assertTrue(_table_exists(table)) - - member = "serviceAccount:{}".format(Config.CLIENT.get_service_account_email()) - BINDING = { - "role": BIGQUERY_DATA_VIEWER_ROLE, - "members": {member}, - } - - policy = Config.CLIENT.get_iam_policy(table) - self.assertIsInstance(policy, Policy) - self.assertEqual(policy.bindings, []) - - policy.bindings.append(BINDING) - returned_policy = Config.CLIENT.set_iam_policy(table, policy) - self.assertEqual(returned_policy.bindings, policy.bindings) - def test_test_iam_permissions(self): dataset = self.temp_dataset(_make_dataset_id("create_table")) table_id = "test_table" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index d20712a8a..60dcab85e 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1782,6 +1782,60 @@ def test_set_iam_policy(self): from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE from google.api_core.iam import Policy + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + ETAG = "foo" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + BINDINGS = [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ] + FIELDS = ("bindings", "etag") + RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} + + policy = Policy() + for binding in BINDINGS: + policy[binding["role"]] = binding["members"] + + BODY = {"policy": policy.to_api_repr(), "updateMask": "bindings,etag"} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + returned_policy = client.set_iam_policy( + self.TABLE_REF, policy, fields=FIELDS, timeout=7.5 + ) + + final_attributes.assert_called_once_with({"path": PATH}, client, None) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + self.assertEqual(returned_policy.etag, ETAG) + self.assertEqual(returned_policy.version, VERSION) + self.assertEqual(dict(returned_policy), dict(policy)) + + def test_set_iam_policy_updateMask(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( self.PROJECT, self.DS_ID, @@ -1858,6 +1912,19 @@ def test_set_iam_policy_no_mask(self): method="POST", path=PATH, data=BODY, timeout=7.5 ) + def test_set_ia_policy_updateMask_and_fields(self): + from google.api_core.iam import Policy + + policy = Policy() + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with pytest.raises(ValueError, match="updateMask"): + client.set_iam_policy( + self.TABLE_REF, policy, updateMask="bindings", fields=("bindings",) + ) + def test_set_iam_policy_invalid_policy(self): from google.api_core.iam import Policy From c2496a1014a7d99e805b3d0a66e4517165bd7e01 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 27 Mar 2024 12:18:17 -0400 Subject: [PATCH 277/536] fix: updates a number of optional dependencies (#1864) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fix updates a number of optional dependencies. We use a different module import process (pytest.importorskip versus unittest.skipif). This first major commit gets the ball rolling, there are gonna be a few additional commits to cover other files. Fixes # 🦕 --- google/cloud/bigquery/_tqdm_helpers.py | 2 +- google/cloud/bigquery/client.py | 2 +- setup.py | 5 +- tests/system/test_client.py | 28 +- tests/unit/test_client.py | 172 +++++----- tests/unit/test_dbapi__helpers.py | 10 +- tests/unit/test_dbapi_connection.py | 28 +- tests/unit/test_dbapi_cursor.py | 33 +- tests/unit/test_table.py | 436 ++++++++++++------------- 9 files changed, 311 insertions(+), 405 deletions(-) diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index 456ca2530..cb81bd8f6 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -67,7 +67,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): ) elif progress_bar_type == "tqdm_gui": return tqdm.tqdm_gui(desc=description, total=total, unit=unit) - except (KeyError, TypeError): + except (KeyError, TypeError): # pragma: NO COVER # Protect ourselves from any tqdm errors. In case of # unexpected tqdm behavior, just fall back to showing # no progress bar. diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 5521e2e1e..891a54e5c 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -593,7 +593,7 @@ def _ensure_bqstorage_client( ) return None - if bqstorage_client is None: + if bqstorage_client is None: # pragma: NO COVER bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, client_options=client_options, diff --git a/setup.py b/setup.py index 5a35f4136..ed9a6351b 100644 --- a/setup.py +++ b/setup.py @@ -45,8 +45,9 @@ ] pyarrow_dependency = "pyarrow >= 3.0.0" extras = { - # Keep the no-op bqstorage extra for backward compatibility. - # See: https://github.com/googleapis/python-bigquery/issues/757 + # bqstorage had a period where it was a required dependency, and has been + # moved back to optional due to bloat. See + # https://github.com/googleapis/python-bigquery/issues/1196 for more background. "bqstorage": [ "google-cloud-bigquery-storage >= 2.6.0, <3.0.0dev", # Due to an issue in pip's dependency resolver, the `grpc` extra is not diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 414239323..862ef3245 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -54,16 +54,6 @@ from . import helpers -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - pyarrow = None JOB_TIMEOUT = 120 # 2 minutes DATA_PATH = pathlib.Path(__file__).parent.parent / "data" @@ -1772,11 +1762,10 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pyarrow") + bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials ) @@ -1834,10 +1823,8 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_dbapi_connection_does_not_leak_sockets(self): + pytest.importorskip("google.cloud.bigquery_storage") current_process = psutil.Process() conn_count_start = len(current_process.connections()) @@ -2382,11 +2369,10 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_nested_table_to_arrow(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + pyarrow = pytest.importorskip("pyarrow") + pyarrow.types = pytest.importorskip("pyarrow.types") from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 60dcab85e..e9e74b06b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -32,15 +32,6 @@ import packaging import pytest -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata - -try: - import pandas -except (ImportError, AttributeError): # pragma: NO COVER - pandas = None try: import opentelemetry @@ -59,11 +50,6 @@ msg = "Error importing from opentelemetry, is the installed version compatible?" raise ImportError(msg) from exc -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None - import google.api_core.exceptions from google.api_core import client_info import google.cloud._helpers @@ -75,18 +61,9 @@ from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import google.cloud.bigquery.table -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection -if pandas is not None: - PANDAS_INSTALLED_VERSION = metadata.version("pandas") -else: - PANDAS_INSTALLED_VERSION = "0.0.0" - def _make_credentials(): import google.auth.credentials @@ -800,10 +777,9 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_creating_new_instance(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() mock_client.return_value = mock_client_instance @@ -849,10 +825,8 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): ] assert matching_warnings, "Missing dependency warning not raised." - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_obsolete_dependency(self): + pytest.importorskip("google.cloud.bigquery_storage") creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -869,10 +843,8 @@ def test_ensure_bqstorage_client_obsolete_dependency(self): ] assert matching_warnings, "Obsolete dependency warning not raised." - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_existing_client_check_passes(self): + pytest.importorskip("google.cloud.bigquery_storage") creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) mock_storage_client = mock.sentinel.mock_storage_client @@ -883,10 +855,23 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): self.assertIs(bqstorage_client, mock_storage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) + def test_ensure_bqstorage_client_is_none(self): + pytest.importorskip("google.cloud.bigquery_storage") + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + bqstorage_client = None + + assert bqstorage_client is None + bqstorage_client = client._ensure_bqstorage_client( + bqstorage_client=bqstorage_client, + ) + + assert isinstance( + bqstorage_client, google.cloud.bigquery_storage_v1.BigQueryReadClient + ) + def test_ensure_bqstorage_client_existing_client_check_fails(self): + pytest.importorskip("google.cloud.bigquery_storage") creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) mock_storage_client = mock.sentinel.mock_storage_client @@ -972,8 +957,8 @@ def test_create_routine_w_conflict(self): timeout=DEFAULT_TIMEOUT, ) - @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") def test_span_status_is_set(self): + pytest.importorskip("opentelemetry") from google.cloud.bigquery.routine import Routine tracer_provider = TracerProvider() @@ -6039,8 +6024,8 @@ def test_insert_rows_w_numeric(self): timeout=DEFAULT_TIMEOUT, ) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -6126,8 +6111,8 @@ def test_insert_rows_from_dataframe(self): ) assert call == expected_call - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe_nan(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -6194,8 +6179,8 @@ def test_insert_rows_from_dataframe_nan(self): ) assert call == expected_call - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe_many_columns(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -6247,8 +6232,8 @@ def test_insert_rows_from_dataframe_many_columns(self): assert len(actual_calls) == 1 assert actual_calls[0] == expected_call - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -7569,9 +7554,9 @@ def test_load_table_from_file_w_default_load_config(self): project=self.PROJECT, ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import PolicyTagList, SchemaField @@ -7665,9 +7650,9 @@ def test_load_table_from_dataframe(self): # (not passed in via job_config) assert "description" not in field - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_client_location(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7710,9 +7695,9 @@ def test_load_table_from_dataframe_w_client_location(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7765,9 +7750,9 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7821,9 +7806,9 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_parquet_options_none(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7873,9 +7858,9 @@ def test_load_table_from_dataframe_w_parquet_options_none(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.parquet_options.enable_list_inference is True - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_none(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7933,9 +7918,9 @@ def test_load_table_from_dataframe_w_list_inference_none(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_explicit_job_config_override(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7994,9 +7979,9 @@ def test_load_table_from_dataframe_w_explicit_job_config_override(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_default_load_config(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8044,9 +8029,9 @@ def test_load_table_from_dataframe_w_default_load_config(self): assert sent_config.write_disposition == job.WriteDisposition.WRITE_TRUNCATE assert sent_config.source_format == job.SourceFormat.PARQUET - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_false(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8105,9 +8090,9 @@ def test_load_table_from_dataframe_w_list_inference_false(self): # the original config object should not have been modified assert job_config.to_api_repr() == original_config_copy.to_api_repr() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery import job client = self._make_client() @@ -8125,9 +8110,9 @@ def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(sel assert "Got unexpected source_format:" in str(exc.value) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8226,9 +8211,9 @@ def test_load_table_from_dataframe_w_automatic_schema(self): SchemaField("time_col", "TIME"), ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -8286,9 +8271,9 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema is None - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_index_and_auto_schema(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8348,9 +8333,9 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): ] assert sent_schema == expected_sent_schema - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_unknown_table(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES client = self._make_client() @@ -8384,9 +8369,9 @@ def test_load_table_from_dataframe_unknown_table(self): timeout=DEFAULT_TIMEOUT, ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8429,9 +8414,8 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): SchemaField("x", "INT64", "NULLABLE", None), ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - # @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8474,9 +8458,9 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se SchemaField("x", "INT64", "NULLABLE", None), ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8534,13 +8518,13 @@ def test_load_table_from_dataframe_struct_fields(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema == schema - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields(self): """Test that a DataFrame with array columns can be uploaded correctly. See: https://github.com/googleapis/python-bigquery/issues/19 """ + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8599,13 +8583,13 @@ def test_load_table_from_dataframe_array_fields(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema == schema - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields_w_auto_schema(self): """Test that a DataFrame with array columns can be uploaded correctly. See: https://github.com/googleapis/python-bigquery/issues/19 """ + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8662,9 +8646,9 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema == expected_schema - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8746,9 +8730,9 @@ def test_load_table_from_dataframe_w_partial_schema(self): SchemaField("bytes_col", "BYTES"), ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_extra_types(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8783,9 +8767,9 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "bq_schema contains fields not present in dataframe" in message assert "unknown_col" in message - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8816,9 +8800,9 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): assert call_args is not None assert call_args.get("parquet_compression") == "LZ4" - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") client = self._make_client() records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) @@ -8846,8 +8830,8 @@ def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): ) def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): - pytest.importorskip("pandas", reason="Requires `pandas`") - pytest.importorskip("pyarrow", reason="Requires `pyarrow`") + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") client = self._make_client() records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] @@ -8874,14 +8858,14 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): location=self.LOCATION, ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): """Test that a DataFrame with null columns can be uploaded if a BigQuery schema is specified. See: https://github.com/googleapis/google-cloud-python/issues/7370 """ + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -8919,8 +8903,8 @@ def test_load_table_from_dataframe_w_nulls(self): assert sent_config.schema == schema assert sent_config.source_format == job.SourceFormat.PARQUET - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_load_table_from_dataframe_w_invaild_job_config(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery import job client = self._make_client() @@ -8937,8 +8921,8 @@ def test_load_table_from_dataframe_w_invaild_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_load_table_from_dataframe_with_csv_source_format(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -8987,9 +8971,9 @@ def test_load_table_from_dataframe_with_csv_source_format(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.CSV - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 542f923d2..7e1da0034 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -21,16 +21,10 @@ import pytest -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - import google.cloud._helpers from google.cloud.bigquery import query, table from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions -from tests.unit.helpers import _to_pyarrow class TestQueryParameters(unittest.TestCase): @@ -215,8 +209,10 @@ def test_empty_iterable(self): result = _helpers.to_bq_table_rows(rows_iterable) self.assertEqual(list(result), []) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_non_empty_iterable(self): + pytest.importorskip("pyarrow") + from tests.unit.helpers import _to_pyarrow + rows_iterable = [ dict( one=_to_pyarrow(1.1), diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 88378ec98..4071e57e0 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -13,14 +13,10 @@ # limitations under the License. import gc +import pytest import unittest from unittest import mock -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - class TestConnection(unittest.TestCase): @staticmethod @@ -41,6 +37,8 @@ def _mock_client(self): def _mock_bqstorage_client(self): # Assumption: bigquery_storage exists. It's the test's responisbility to # not use this helper or skip itself if bqstorage is not installed. + from google.cloud import bigquery_storage + mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client._transport = mock.Mock(spec=["channel"]) mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) @@ -57,10 +55,8 @@ def test_ctor_wo_bqstorage_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, None) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ctor_w_bqstorage_client(self): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() @@ -89,10 +85,8 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_client(self): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -107,10 +101,8 @@ def test_connect_w_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_both_clients(self): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -143,10 +135,8 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_closes_all_created_bigquery_clients(self): + pytest.importorskip("google.cloud.bigquery_storage") client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -168,10 +158,8 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): + pytest.importorskip("google.cloud.bigquery_storage") client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() connection = self._make_one(client=client, bqstorage_client=bqstorage_client) diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index e9fd2e3dd..6fca4cec0 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -21,18 +21,8 @@ import google.cloud.bigquery.table as bq_table -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - from google.api_core import exceptions -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - from tests.unit.helpers import _to_pyarrow @@ -97,6 +87,8 @@ def _mock_client( return mock_client def _mock_bqstorage_client(self, rows=None, stream_count=0): + from google.cloud import bigquery_storage + if rows is None: rows = [] @@ -320,11 +312,9 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pyarrow") from google.cloud.bigquery import dbapi # use unordered data to also test any non-determenistic key order in dicts @@ -380,10 +370,8 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import dbapi mock_client = self._mock_client( @@ -410,10 +398,8 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): # check the data returned self.assertEqual(rows, []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import dbapi row_data = [bq_table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] @@ -448,11 +434,10 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): # the default client was not used mock_client.list_rows.assert_not_called() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pyarrow") + from google.cloud import bigquery_storage from google.cloud.bigquery import dbapi # Use unordered data to also test any non-determenistic key order in dicts. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a8107ee97..dbc5948b8 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -24,11 +24,6 @@ import pytest -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata - import google.api_core.exceptions from test_utils.imports import maybe_fail_import @@ -37,48 +32,6 @@ from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference -try: - from google.cloud import bigquery_storage - from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( - grpc as big_query_read_grpc_transport, - ) -except ImportError: # pragma: NO COVER - bigquery_storage = None - big_query_read_grpc_transport = None - - -pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() - -if pyarrow: # pragma: NO COVER - import pyarrow.types - -try: - import pandas -except (ImportError, AttributeError): # pragma: NO COVER - pandas = None - -try: - import db_dtypes # type: ignore -except ImportError: # pragma: NO COVER - db_dtypes = None - -try: - import geopandas -except (ImportError, AttributeError): # pragma: NO COVER - geopandas = None - -try: - import tqdm - from tqdm.std import TqdmDeprecationWarning - -except (ImportError, AttributeError): # pragma: NO COVER - tqdm = None - -if pandas is not None: - PANDAS_INSTALLED_VERSION = metadata.version("pandas") -else: - PANDAS_INSTALLED_VERSION = "0.0.0" - def _mock_client(): from google.cloud.bigquery import client @@ -1948,6 +1901,8 @@ def test_row(self): class Test_EmptyRowIterator(unittest.TestCase): + PYARROW_MINIMUM_VERSION = str(_versions_helpers._MIN_PYARROW_VERSION) + def _make_one(self): from google.cloud.bigquery.table import _EmptyRowIterator @@ -1963,15 +1918,17 @@ def test_to_arrow_error_if_pyarrow_is_none(self): with self.assertRaises(ValueError): row_iterator.to_arrow() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): + pyarrow = pytest.importorskip("pyarrow") row_iterator = self._make_one() tbl = row_iterator.to_arrow() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 0) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) row_iterator = self._make_one() arrow_iter = row_iterator.to_arrow_iterable() @@ -1989,8 +1946,8 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): + pandas = pytest.importorskip("pandas") row_iterator = self._make_one() df = row_iterator.to_dataframe(create_bqstorage_client=False) self.assertIsInstance(df, pandas.DataFrame) @@ -2002,8 +1959,8 @@ def test_to_dataframe_iterable_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe_iterable() - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable(self): + pandas = pytest.importorskip("pandas") row_iterator = self._make_one() df_iter = row_iterator.to_dataframe_iterable() @@ -2027,8 +1984,8 @@ def test_to_geodataframe_if_geopandas_is_none(self): ): row_iterator.to_geodataframe(create_bqstorage_client=False) - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe(self): + geopandas = pytest.importorskip("geopandas") row_iterator = self._make_one() df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) @@ -2040,6 +1997,8 @@ def test_to_geodataframe(self): class TestRowIterator(unittest.TestCase): + PYARROW_MINIMUM_VERSION = str(_versions_helpers._MIN_PYARROW_VERSION) + def _class_under_test(self): from google.cloud.bigquery.table import RowIterator @@ -2367,10 +2326,8 @@ def test__should_use_bqstorage_returns_false_when_completely_cached(self): ) ) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test__should_use_bqstorage_returns_true_if_no_cached_results(self): + pytest.importorskip("google.cloud.bigquery_storage") iterator = self._make_one(first_page_response=None) # not cached result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True @@ -2413,10 +2370,8 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): self.assertFalse(result) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test__should_use_bqstorage_returns_false_w_warning_if_obsolete_version(self): + pytest.importorskip("google.cloud.bigquery_storage") iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( @@ -2435,8 +2390,10 @@ def test__should_use_bqstorage_returns_false_w_warning_if_obsolete_version(self) ] assert matching_warnings, "Obsolete dependency warning not raised." - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2536,14 +2493,17 @@ def test_to_arrow_iterable(self): [[{"name": "Bepples Phlyntstone", "age": 0}, {"name": "Dino", "age": 4}]], ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_iterable_w_bqstorage(self): + pyarrow = pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1 import reader + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) + from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1 import reader bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_client._transport = mock.create_autospec( @@ -2615,8 +2575,10 @@ def test_to_arrow_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2697,8 +2659,11 @@ def test_to_arrow(self): ], ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_nulls(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) + import pyarrow.types from google.cloud.bigquery.schema import SchemaField schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] @@ -2730,8 +2695,10 @@ def test_to_arrow_w_nulls(self): self.assertEqual(names, ["Donkey", "Diddy", "Dixie", None]) self.assertEqual(ages, [32, 29, None, 111]) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_unknown_type(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2773,8 +2740,10 @@ def test_to_arrow_w_unknown_type(self): warning = warned[0] self.assertTrue("sport" in str(warning)) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2812,11 +2781,9 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): + pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2856,11 +2823,9 @@ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): ) mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): + pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2896,14 +2861,16 @@ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage(self): + pyarrow = pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage from google.cloud.bigquery_storage_v1 import reader + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_client._transport = mock.create_autospec( @@ -2977,13 +2944,15 @@ def test_to_arrow_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_creates_client(self): + pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) mock_client = _mock_client() bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -3008,8 +2977,10 @@ def test_to_arrow_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): + pyarrow = pytest.importorskip( + "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION + ) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3039,13 +3010,12 @@ def mock_verify_version(raise_if_error: bool = False): self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_no_streams(self): + pyarrow = pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() @@ -3079,12 +3049,10 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[1].name, "colC") self.assertEqual(actual_table.schema[2].name, "colB") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm_gui") - @mock.patch("tqdm.notebook.tqdm") - @mock.patch("tqdm.tqdm") - def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock): + def test_to_arrow_progress_bar(self): + pytest.importorskip("pyarrow") + pytest.importorskip("tqdm") + pytest.importorskip("tqdm.notebook") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3101,12 +3069,13 @@ def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_moc api_request = mock.Mock(return_value={"rows": rows}) progress_bars = ( - ("tqdm", tqdm_mock), - ("tqdm_notebook", tqdm_notebook_mock), - ("tqdm_gui", tqdm_gui_mock), + ("tqdm", mock.patch("tqdm.tqdm")), + ("tqdm_notebook", mock.patch("tqdm.notebook.tqdm")), + ("tqdm_gui", mock.patch("tqdm.tqdm_gui")), ) - for progress_bar_type, progress_bar_mock in progress_bars: + for progress_bar_type, bar_patch in progress_bars: + progress_bar_mock = bar_patch.start() row_iterator = self._make_one(_mock_client(), api_request, path, schema) tbl = row_iterator.to_arrow( progress_bar_type=progress_bar_type, @@ -3129,8 +3098,8 @@ def test_to_arrow_w_pyarrow_none(self): with self.assertRaises(ValueError): row_iterator.to_arrow() - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3171,8 +3140,8 @@ def test_to_dataframe_iterable(self): self.assertEqual(df_2["name"][0], "Sven") self.assertEqual(df_2["age"][0], 33) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_with_dtypes(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3213,15 +3182,17 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["name"][0], "Sven") self.assertEqual(df_2["age"][0], 33) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): + pandas = pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage from google.cloud.bigquery_storage_v1 import reader + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) arrow_fields = [ pyarrow.field("colA", pyarrow.int64()), @@ -3285,13 +3256,12 @@ def test_to_dataframe_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -3358,8 +3328,8 @@ def test_to_dataframe_iterable_error_if_pandas_is_none(self): with pytest.raises(ValueError, match="pandas"): row_iterator.to_dataframe_iterable() - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3384,9 +3354,9 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "Int64") - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.schema import SchemaField schema = [SchemaField("some_timestamp", "TIMESTAMP")] @@ -3412,9 +3382,9 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ], ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.schema import SchemaField schema = [SchemaField("some_datetime", "DATETIME")] @@ -3436,14 +3406,10 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): [datetime.datetime(4567, 1, 1), datetime.datetime(9999, 12, 31)], ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm_gui") - @mock.patch("tqdm.notebook.tqdm") - @mock.patch("tqdm.tqdm") - def test_to_dataframe_progress_bar( - self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock - ): + def test_to_dataframe_progress_bar(self): + pytest.importorskip("pandas") + pytest.importorskip("tqdm") + from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3460,12 +3426,13 @@ def test_to_dataframe_progress_bar( api_request = mock.Mock(return_value={"rows": rows}) progress_bars = ( - ("tqdm", tqdm_mock), - ("tqdm_notebook", tqdm_notebook_mock), - ("tqdm_gui", tqdm_gui_mock), + ("tqdm", mock.patch("tqdm.tqdm")), + ("tqdm_notebook", mock.patch("tqdm.notebook.tqdm")), + ("tqdm_gui", mock.patch("tqdm.tqdm_gui")), ) - for progress_bar_type, progress_bar_mock in progress_bars: + for progress_bar_type, bar_patch in progress_bars: + progress_bar_mock = bar_patch.start() row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe( progress_bar_type=progress_bar_type, @@ -3477,9 +3444,9 @@ def test_to_dataframe_progress_bar( progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) - @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): + pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3505,9 +3472,9 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): self.assertEqual(len(user_warnings), 0) self.assertEqual(len(df), 4) - @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) def test_to_dataframe_no_tqdm(self): + pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3539,12 +3506,12 @@ def test_to_dataframe_no_tqdm(self): # should still work. self.assertEqual(len(df), 4) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm_gui", new=None) # will raise TypeError on call - @mock.patch("tqdm.notebook.tqdm", new=None) # will raise TypeError on call - @mock.patch("tqdm.tqdm", new=None) # will raise TypeError on call def test_to_dataframe_tqdm_error(self): + pytest.importorskip("pandas") + pytest.importorskip("tqdm") + mock.patch("tqdm.tqdm_gui", new=None) + mock.patch("tqdm.notebook.tqdm", new=None) + mock.patch("tqdm.tqdm", new=None) from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3573,14 +3540,14 @@ def test_to_dataframe_tqdm_error(self): # Warn that a progress bar was requested, but creating the tqdm # progress bar failed. - for warning in warned: + for warning in warned: # pragma: NO COVER self.assertIn( warning.category, - [UserWarning, DeprecationWarning, TqdmDeprecationWarning], + [UserWarning, DeprecationWarning], ) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3596,8 +3563,8 @@ def test_to_dataframe_w_empty_results(self): self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): + pandas = pytest.importorskip("pandas") import datetime from google.cloud.bigquery.schema import SchemaField @@ -3637,8 +3604,9 @@ def test_to_dataframe_w_various_types_nullable(self): self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_dtypes_mapper(self): + pandas = pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3832,9 +3800,11 @@ def test_to_dataframe_w_dtypes_mapper(self): ) self.assertEqual(df.timestamp.dtype.name, "object") - @unittest.skipIf(pandas is None, "Requires `pandas`") - @pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") def test_to_dataframe_w_none_dtypes_mapper(self): + pandas = pytest.importorskip("pandas") + pandas_major_version = pandas.__version__[0:2] + if pandas_major_version not in ["0.", "1."]: + pytest.skip(reason="Requires a version of pandas less than 2.0") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3888,8 +3858,8 @@ def test_to_dataframe_w_none_dtypes_mapper(self): self.assertEqual(df.time.dtype.name, "object") self.assertEqual(df.timestamp.dtype.name, "datetime64[ns, UTC]") - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_unsupported_dtypes_mapper(self): + pytest.importorskip("pandas") import numpy from google.cloud.bigquery.schema import SchemaField @@ -3945,9 +3915,11 @@ def test_to_dataframe_w_unsupported_dtypes_mapper(self): timestamp_dtype=numpy.dtype("datetime64[us]"), ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") def test_to_dataframe_column_dtypes(self): + pandas = pytest.importorskip("pandas") + pandas_major_version = pandas.__version__[0:2] + if pandas_major_version not in ["0.", "1."]: + pytest.skip("Requires a version of pandas less than 2.0") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3960,9 +3932,9 @@ def test_to_dataframe_column_dtypes(self): SchemaField("date", "DATE"), ] row_data = [ - ["1433836800000000", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"], + ["1433836800000", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"], [ - "1387811700000000", + "1387811700000", "2580", "17.7", "28.5", @@ -3970,7 +3942,7 @@ def test_to_dataframe_column_dtypes(self): "false", "1953-06-14", ], - ["1385565300000000", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], + ["1385565300000", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3995,13 +3967,12 @@ def test_to_dataframe_column_dtypes(self): self.assertEqual(df.complete.dtype.name, "boolean") self.assertEqual(df.date.dtype.name, "dbdate") - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_datetime_objects(self): # When converting date or timestamp values to nanosecond # precision, the result can be out of pyarrow bounds. To avoid # the error when converting to Pandas, we use object type if # necessary. - + pandas = pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4044,9 +4015,10 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() - @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.shapely", new=None) def test_to_dataframe_error_if_shapely_is_none(self): + pytest.importorskip("pandas") + with self.assertRaisesRegex( ValueError, re.escape( @@ -4056,8 +4028,9 @@ def test_to_dataframe_error_if_shapely_is_none(self): ): self._make_one_from_data().to_dataframe(geography_as_object=True) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_bqstorage_warning(self): + pytest.importorskip("pandas") + from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4092,8 +4065,8 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self): + pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4133,8 +4106,8 @@ def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self): ) mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): + pytest.importorskip("pandas") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4170,13 +4143,15 @@ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_creates_client(self): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) mock_client = _mock_client() bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -4201,13 +4176,12 @@ def test_to_dataframe_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_no_streams(self): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() @@ -4230,13 +4204,12 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.table import Table + from google.cloud import bigquery_storage bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() @@ -4255,12 +4228,11 @@ def test_to_dataframe_w_bqstorage_logs_session(self): "with BQ Storage API session 'projects/test-proj/locations/us/sessions/SOMESESSION'." ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") + from google.cloud import bigquery_storage from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader @@ -4310,15 +4282,17 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage from google.cloud.bigquery_storage_v1 import reader + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) arrow_fields = [ pyarrow.field("colA", pyarrow.int64()), @@ -4390,12 +4364,10 @@ def test_to_dataframe_w_bqstorage_nonempty(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader @@ -4444,14 +4416,11 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertEqual(len(got.index), total_rows) self.assertTrue(got.index.is_unique) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm") - def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): + def test_to_dataframe_w_bqstorage_updates_progress_bar(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") + pytest.importorskip("tqdm") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader @@ -4507,28 +4476,27 @@ def blocking_to_arrow(*args, **kwargs): selected_fields=schema, ) - row_iterator.to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type="tqdm" - ) + with mock.patch("tqdm.tqdm") as tqdm_mock: + row_iterator.to_dataframe( + bqstorage_client=bqstorage_client, progress_bar_type="tqdm" + ) + + # Make sure that this test updated the progress bar once per page from + # each stream. + total_pages = len(streams) * len(mock_pages) + expected_total_rows = total_pages * len(page_items) + progress_updates = [ + args[0] for args, kwargs in tqdm_mock().update.call_args_list + ] + # Should have sent >1 update due to delay in blocking_to_arrow. + self.assertGreater(len(progress_updates), 1) + self.assertEqual(sum(progress_updates), expected_total_rows) + tqdm_mock().close.assert_called_once() - # Make sure that this test updated the progress bar once per page from - # each stream. - total_pages = len(streams) * len(mock_pages) - expected_total_rows = total_pages * len(page_items) - progress_updates = [ - args[0] for args, kwargs in tqdm_mock().update.call_args_list - ] - # Should have sent >1 update due to delay in blocking_to_arrow. - self.assertGreater(len(progress_updates), 1) - self.assertEqual(sum(progress_updates), expected_total_rows) - tqdm_mock().close.assert_called_once() - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader @@ -4611,8 +4579,8 @@ def blocking_to_arrow(*args, **kwargs): # should have been set. self.assertLessEqual(mock_page.to_dataframe.call_count, 2) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): + pandas = pytest.importorskip("pandas") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4643,11 +4611,10 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): self.assertEqual(df.name.dtype.name, "object") self.assertTrue(df.index.is_unique) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): + pytest.importorskip("google.cloud.bigquery_storage") + pytest.importorskip("pandas") + from google.cloud import bigquery_storage from google.cloud.bigquery import table as mut bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -4665,10 +4632,8 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_partition(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4685,10 +4650,8 @@ def test_to_dataframe_w_bqstorage_partition(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_snapshot(self): + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4705,15 +4668,17 @@ def test_to_dataframe_w_bqstorage_snapshot(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): + pytest.importorskip("google.cloud.bigquery_storage") + pandas = pytest.importorskip("pandas") + pyarrow = pytest.importorskip("pyarrow") + from google.cloud import bigquery_storage from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) arrow_fields = [ # Not alphabetical to test column order. @@ -4818,8 +4783,9 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_dataframe_geography_as_object(self): + pandas = pytest.importorskip("pandas") + pytest.importorskip("geopandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "GEOGRAPHY")), ( @@ -4853,8 +4819,8 @@ def test_to_geodataframe_error_if_geopandas_is_none(self): ): self._make_one_from_data().to_geodataframe() - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe(self): + geopandas = pytest.importorskip("geopandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "GEOGRAPHY")), ( @@ -4883,8 +4849,8 @@ def test_to_geodataframe(self): self.assertEqual(df.geog.crs.srs, "EPSG:4326") self.assertEqual(df.geog.crs.name, "WGS 84") - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe_ambiguous_geog(self): + pytest.importorskip("geopandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () ) @@ -4898,8 +4864,8 @@ def test_to_geodataframe_ambiguous_geog(self): ): row_iterator.to_geodataframe(create_bqstorage_client=False) - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe_bad_geography_column(self): + pytest.importorskip("geopandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () ) @@ -4914,8 +4880,8 @@ def test_to_geodataframe_bad_geography_column(self): create_bqstorage_client=False, geography_column="xxx" ) - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe_no_geog(self): + pytest.importorskip("geopandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "STRING")), () ) @@ -4928,8 +4894,9 @@ def test_to_geodataframe_no_geog(self): ): row_iterator.to_geodataframe(create_bqstorage_client=False) - @unittest.skipIf(geopandas is None, "Requires `geopandas`") def test_to_geodataframe_w_geography_column(self): + geopandas = pytest.importorskip("geopandas") + pandas = pytest.importorskip("pandas") row_iterator = self._make_one_from_data( (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ( @@ -4974,7 +4941,6 @@ def test_to_geodataframe_w_geography_column(self): ["0.0", "0.0", "0.0"], ) - @unittest.skipIf(geopandas is None, "Requires `geopandas`") @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): """ @@ -4983,6 +4949,8 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): This test just demonstrates that. We don't need to test all the variations, which are tested for to_dataframe. """ + pandas = pytest.importorskip("pandas") + geopandas = pytest.importorskip("geopandas") import numpy from shapely import wkt @@ -5676,9 +5644,6 @@ def test_from_api_repr_only_foreign_keys_resource(self): self.assertIsNotNone(instance.foreign_keys) -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.parametrize( "table_path", ( @@ -5689,6 +5654,7 @@ def test_from_api_repr_only_foreign_keys_resource(self): ), ) def test_table_reference_to_bqstorage_v1_stable(table_path): + pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import table as mut expected = "projects/my-project/datasets/my_dataset/tables/my_table" From 1e71bc87263c05cd153e96a3559d688ba0fe3825 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 27 Mar 2024 18:26:43 +0100 Subject: [PATCH 278/536] chore(deps): update all dependencies (#1873) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt * Update samples/geography/requirements.txt * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6fa7ffc7e..bdaead5b1 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -32,8 +32,10 @@ pandas==2.2.1; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==15.0.2; python_version >= '3.8' -pyasn1==0.5.1 -pyasn1-modules==0.3.0 +pyasn1==0.5.1; python_version == '3.7' +pyasn1==0.6.0; python_version >= '3.8' +pyasn1-modules==0.3.0; python_version == '3.7' +pyasn1-modules==0.4.0; python_version >= '3.8' pycparser==2.21 pyparsing==3.1.2 python-dateutil==2.9.0.post0 From 7dfee0c585d2a3781ffc6e769c7c8bbe4dbe9714 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 27 Mar 2024 14:39:38 -0700 Subject: [PATCH 279/536] chore(main): release 3.20.0 (#1850) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Sweña (Swast) --- CHANGELOG.md | 15 +++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cb0e1d20..578df101f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.20.0](https://github.com/googleapis/python-bigquery/compare/v3.19.0...v3.20.0) (2024-03-27) + + +### Features + +* Add `fields` parameter to `set_iam_policy` for consistency with update methods ([#1872](https://github.com/googleapis/python-bigquery/issues/1872)) ([08b1e6f](https://github.com/googleapis/python-bigquery/commit/08b1e6f9c41121907c345daedbae40ece18e8b6a)) + + +### Bug Fixes + +* Correct type checking ([#1848](https://github.com/googleapis/python-bigquery/issues/1848)) ([2660dbd](https://github.com/googleapis/python-bigquery/commit/2660dbd4821a89a1e20e3e1541504a409f1979aa)) +* Update error logging when converting to pyarrow column fails ([#1836](https://github.com/googleapis/python-bigquery/issues/1836)) ([0ac6e9b](https://github.com/googleapis/python-bigquery/commit/0ac6e9bf186945832f5dcdf5a4d95667b4da223e)) +* Updates a number of optional dependencies ([#1864](https://github.com/googleapis/python-bigquery/issues/1864)) ([c2496a1](https://github.com/googleapis/python-bigquery/commit/c2496a1014a7d99e805b3d0a66e4517165bd7e01)) +* Use an allowlist instead of denylist to determine when `query_and_wait` uses `jobs.query` API ([#1869](https://github.com/googleapis/python-bigquery/issues/1869)) ([e265db6](https://github.com/googleapis/python-bigquery/commit/e265db6a6a37d13056dcaac240c2cf3975dfd644)) + ## [3.19.0](https://github.com/googleapis/python-bigquery/compare/v3.18.0...v3.19.0) (2024-03-11) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 27f24bd19..4537b8250 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.19.0" +__version__ = "3.20.0" From 21714e18bad8d8d89ed5642dbdb61d14e97d5f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 28 Mar 2024 15:15:14 -0500 Subject: [PATCH 280/536] fix: make `pyarrow` an optional dependency post-3.20.0 yanked release (#1879) * fix: make `pyarrow` an optional dependency again * install older version of pyarrow * fix for older tqdm * remove many pragma: NO COVERs --- google/cloud/bigquery/_pandas_helpers.py | 18 ++++------ google/cloud/bigquery/_pyarrow_helpers.py | 4 +-- google/cloud/bigquery/_tqdm_helpers.py | 13 ++++--- google/cloud/bigquery/_versions_helpers.py | 4 +-- google/cloud/bigquery/job/query.py | 7 +--- google/cloud/bigquery/magics/magics.py | 2 +- google/cloud/bigquery/table.py | 6 ++-- noxfile.py | 15 ++++---- samples/desktopapp/requirements-test.txt | 1 - samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 3 +- testing/constraints-3.11.txt | 1 - testing/constraints-3.12.txt | 1 - testing/constraints-3.7.txt | 4 +-- tests/unit/job/test_query_pandas.py | 40 ++++++++-------------- tests/unit/test__pandas_helpers.py | 15 ++++---- tests/unit/test__versions_helpers.py | 33 +++++++++++++----- tests/unit/test_legacy_types.py | 2 +- tests/unit/test_opentelemetry_tracing.py | 2 +- tests/unit/test_table.py | 38 ++++++++++++++++++-- tests/unit/test_table_pandas.py | 15 +++----- 21 files changed, 126 insertions(+), 100 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 9f8dcfde4..3b58d3736 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -32,7 +32,7 @@ import pandas # type: ignore pandas_import_exception = None -except ImportError as exc: # pragma: NO COVER +except ImportError as exc: pandas = None pandas_import_exception = exc else: @@ -44,25 +44,21 @@ date_dtype_name = db_dtypes.DateDtype.name time_dtype_name = db_dtypes.TimeDtype.name db_dtypes_import_exception = None -except ImportError as exc: # pragma: NO COVER +except ImportError as exc: db_dtypes = None db_dtypes_import_exception = exc date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype -pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) -from pyarrow import ArrowTypeError # type: ignore # noqa: E402 - -_BIGNUMERIC_SUPPORT = False -if pyarrow is not None: # pragma: NO COVER - _BIGNUMERIC_SUPPORT = True +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` from shapely.geometry.base import BaseGeometry as _BaseGeometry # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: # No shapely, use NoneType for _BaseGeometry as a placeholder. _BaseGeometry = type(None) else: + # We don't have any unit test sessions that install shapely but not pandas. if pandas is not None: # pragma: NO COVER def _to_wkb(): @@ -309,10 +305,10 @@ def bq_to_arrow_array(series, bq_field): if field_type_upper in schema._STRUCT_TYPES: return pyarrow.StructArray.from_pandas(series, type=arrow_type) return pyarrow.Array.from_pandas(series, type=arrow_type) - except ArrowTypeError: # pragma: NO COVER + except pyarrow.ArrowTypeError: msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray""" _LOGGER.error(msg) - raise ArrowTypeError(msg) + raise pyarrow.ArrowTypeError(msg) def get_column_or_index(dataframe, name): diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 06509cc93..3c745a611 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -20,7 +20,7 @@ try: import pyarrow # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pyarrow = None @@ -49,7 +49,7 @@ def pyarrow_timestamp(): _BQ_TO_ARROW_SCALARS = {} _ARROW_SCALAR_IDS_TO_BQ = {} -if pyarrow: # pragma: NO COVER +if pyarrow: # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py # When modifying it be sure to update it there as well. # Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index cb81bd8f6..22ccee971 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -23,11 +23,14 @@ try: import tqdm # type: ignore - import tqdm.notebook as notebook # type: ignore - -except ImportError: # pragma: NO COVER +except ImportError: tqdm = None +try: + import tqdm.notebook as tqdm_notebook # type: ignore +except ImportError: + tqdm_notebook = None + if typing.TYPE_CHECKING: # pragma: NO COVER from google.cloud.bigquery import QueryJob from google.cloud.bigquery.table import RowIterator @@ -42,7 +45,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): """Construct a tqdm progress bar object, if tqdm is installed.""" - if tqdm is None: + if tqdm is None or tqdm_notebook is None and progress_bar_type == "tqdm_notebook": if progress_bar_type is not None: warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) return None @@ -58,7 +61,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): unit=unit, ) elif progress_bar_type == "tqdm_notebook": - return notebook.tqdm( + return tqdm_notebook.tqdm( bar_format="{l_bar}{bar}|", desc=description, file=sys.stdout, diff --git a/google/cloud/bigquery/_versions_helpers.py b/google/cloud/bigquery/_versions_helpers.py index 4ff4b9700..50d5961b3 100644 --- a/google/cloud/bigquery/_versions_helpers.py +++ b/google/cloud/bigquery/_versions_helpers.py @@ -73,7 +73,7 @@ def try_import(self, raise_if_error: bool = False) -> Any: """ try: import pyarrow - except ImportError as exc: # pragma: NO COVER + except ImportError as exc: if raise_if_error: raise exceptions.LegacyPyarrowError( "pyarrow package not found. Install pyarrow version >=" @@ -212,7 +212,7 @@ def try_import(self, raise_if_error: bool = False) -> Any: """ try: import pandas - except ImportError as exc: # pragma: NO COVER + except ImportError as exc: if raise_if_error: raise exceptions.LegacyPandasError( "pandas package not found. Install pandas version >=" diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 83d2751ce..e92e9cb9e 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -56,14 +56,9 @@ try: import pandas # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pandas = None -try: - import db_dtypes # type: ignore -except ImportError: # pragma: NO COVER - db_dtypes = None - if typing.TYPE_CHECKING: # pragma: NO COVER # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 8464c8792..6e6b21965 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -95,7 +95,7 @@ import IPython # type: ignore from IPython import display # type: ignore from IPython.core import magic_arguments # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: raise ImportError("This module can only be loaded in IPython.") from google.api_core import client_info diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index b3be4ff90..c002822fe 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -26,17 +26,17 @@ try: import pandas # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pandas = None try: import pyarrow # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pyarrow = None try: import db_dtypes # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: db_dtypes = None try: diff --git a/noxfile.py b/noxfile.py index 548690afa..3adb4ba70 100644 --- a/noxfile.py +++ b/noxfile.py @@ -86,7 +86,7 @@ def default(session, install_extras=True): install_target = ".[all]" else: install_target = "." - session.install("-e", install_target) + session.install("-e", install_target, "-c", constraints_path) session.run("python", "-m", "pip", "freeze") # Run py.test against the unit tests. @@ -115,14 +115,15 @@ def unit(session): def unit_noextras(session): """Run the unit test suite.""" - # Install optional dependencies that are out-of-date. + # Install optional dependencies that are out-of-date to see that + # we fail gracefully. # https://github.com/googleapis/python-bigquery/issues/933 - # There is no pyarrow 1.0.0 package for Python 3.9. - + # + # We only install this extra package on one of the two Python versions + # so that it continues to be an optional dependency. + # https://github.com/googleapis/python-bigquery/issues/1877 if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - session.install("pyarrow>=3.0.0") - elif session.python == UNIT_TEST_PYTHON_VERSIONS[-1]: - session.install("pyarrow") + session.install("pyarrow==1.0.0") default(session, install_extras=False) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 413a7fd48..9142d4905 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -2,4 +2,3 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 -pyarrow>=3.0.0 \ No newline at end of file diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 413a7fd48..0343ab89a 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ +# samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' pytest==8.1.1; python_version >= '3.8' mock==5.1.0 -pyarrow>=3.0.0 \ No newline at end of file diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index b3347499f..af9436c51 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1 +1,2 @@ -google-cloud-bigquery==3.19.0 \ No newline at end of file +# samples/snippets should be runnable with no "extras" +google-cloud-bigquery==3.19.0 diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt index e80ca0ccf..e69de29bb 100644 --- a/testing/constraints-3.11.txt +++ b/testing/constraints-3.11.txt @@ -1 +0,0 @@ -pyarrow>=3.0.0 \ No newline at end of file diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt index e80ca0ccf..e69de29bb 100644 --- a/testing/constraints-3.12.txt +++ b/testing/constraints-3.12.txt @@ -1 +0,0 @@ -pyarrow>=3.0.0 \ No newline at end of file diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 1fc7c6838..d64e06cc3 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -27,9 +27,9 @@ packaging==20.0.0 pandas==1.1.0 proto-plus==1.22.0 protobuf==3.19.5 -pyarrow>=3.0.0 +pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.21.0 Shapely==1.8.4 six==1.13.0 -tqdm==4.7.4 \ No newline at end of file +tqdm==4.7.4 diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 1473ef283..3a5d92dbd 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -19,53 +19,38 @@ import pytest +from ..helpers import make_connection +from .helpers import _make_client +from .helpers import _make_job_resource try: from google.cloud import bigquery_storage import google.cloud.bigquery_storage_v1.reader import google.cloud.bigquery_storage_v1.services.big_query_read.client -except (ImportError, AttributeError): # pragma: NO COVER +except (ImportError, AttributeError): bigquery_storage = None -try: - import pandas -except (ImportError, AttributeError): # pragma: NO COVER - pandas = None try: import shapely -except (ImportError, AttributeError): # pragma: NO COVER +except (ImportError, AttributeError): shapely = None try: import geopandas -except (ImportError, AttributeError): # pragma: NO COVER +except (ImportError, AttributeError): geopandas = None try: import tqdm -except (ImportError, AttributeError): # pragma: NO COVER +except (ImportError, AttributeError): tqdm = None -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata - -from ..helpers import make_connection -from .helpers import _make_client -from .helpers import _make_job_resource - -if pandas is not None: - PANDAS_INSTALLED_VERSION = metadata.version("pandas") -else: - PANDAS_INSTALLED_VERSION = "0.0.0" - -pandas = pytest.importorskip("pandas") - try: import pyarrow import pyarrow.types -except ImportError: # pragma: NO COVER +except ImportError: pyarrow = None +pandas = pytest.importorskip("pandas") + @pytest.fixture def table_read_options_kwarg(): @@ -660,7 +645,10 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) -@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") +@pytest.mark.skipif( + pandas.__version__.startswith("2."), + reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", +) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 244384620..5c13669f3 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -30,12 +30,12 @@ import pandas import pandas.api.types import pandas.testing -except ImportError: # pragma: NO COVER +except ImportError: pandas = None try: import geopandas -except ImportError: # pragma: NO COVER +except ImportError: geopandas = None import pytest @@ -46,18 +46,19 @@ from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() if pyarrow: import pyarrow.parquet import pyarrow.types - from pyarrow import ArrowTypeError # type: ignore # noqa: E402 -else: # pragma: NO COVER + + _BIGNUMERIC_SUPPORT = True +else: # Mock out pyarrow when missing, because methods from pyarrow.types are # used in test parameterization. pyarrow = mock.Mock() + _BIGNUMERIC_SUPPORT = False bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import() @@ -572,9 +573,9 @@ def test_bq_to_arrow_array_w_conversion_fail(module_under_test): # pragma: NO C series = pandas.Series(rows, name="test_col", dtype="object") bq_field = schema.SchemaField("field_name", "STRING", mode="REPEATED") exc_msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray""" - with pytest.raises(ArrowTypeError, match=exc_msg): + with pytest.raises(pyarrow.ArrowTypeError, match=exc_msg): module_under_test.bq_to_arrow_array(series, bq_field) - raise ArrowTypeError(exc_msg) + raise pyarrow.ArrowTypeError(exc_msg) @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) diff --git a/tests/unit/test__versions_helpers.py b/tests/unit/test__versions_helpers.py index 8fa099627..b1d0ef1ac 100644 --- a/tests/unit/test__versions_helpers.py +++ b/tests/unit/test__versions_helpers.py @@ -18,17 +18,17 @@ try: import pyarrow # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pyarrow = None try: from google.cloud import bigquery_storage # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: bigquery_storage = None try: import pandas # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: pandas = None from google.cloud.bigquery import _versions_helpers @@ -39,11 +39,8 @@ def test_try_import_raises_no_error_w_recent_pyarrow(): versions = _versions_helpers.PyarrowVersions() with mock.patch("pyarrow.__version__", new="5.0.0"): - try: - pyarrow = versions.try_import(raise_if_error=True) - assert pyarrow is not None - except exceptions.LegacyPyarrowError: # pragma: NO COVER - raise ("Legacy error raised with a non-legacy dependency version.") + pyarrow = versions.try_import(raise_if_error=True) + assert pyarrow is not None @pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") @@ -62,6 +59,16 @@ def test_try_import_raises_error_w_legacy_pyarrow(): versions.try_import(raise_if_error=True) +@pytest.mark.skipif( + pyarrow is not None, + reason="pyarrow is installed, but this test needs it not to be", +) +def test_try_import_raises_error_w_no_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with pytest.raises(exceptions.LegacyPyarrowError): + versions.try_import(raise_if_error=True) + + @pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") def test_installed_pyarrow_version_returns_cached(): versions = _versions_helpers.PyarrowVersions() @@ -208,6 +215,16 @@ def test_try_import_raises_error_w_legacy_pandas(): versions.try_import(raise_if_error=True) +@pytest.mark.skipif( + pandas is not None, + reason="pandas is installed, but this test needs it not to be", +) +def test_try_import_raises_error_w_no_pandas(): + versions = _versions_helpers.PandasVersions() + with pytest.raises(exceptions.LegacyPandasError): + versions.try_import(raise_if_error=True) + + @pytest.mark.skipif(pandas is None, reason="pandas is not installed") def test_installed_pandas_version_returns_cached(): versions = _versions_helpers.PandasVersions() diff --git a/tests/unit/test_legacy_types.py b/tests/unit/test_legacy_types.py index 3431074fd..809be1855 100644 --- a/tests/unit/test_legacy_types.py +++ b/tests/unit/test_legacy_types.py @@ -19,7 +19,7 @@ try: import proto # type: ignore -except ImportError: # pragma: NO COVER +except ImportError: proto = None diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index e96e18c6b..579d7b1b7 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -19,7 +19,7 @@ try: import opentelemetry -except ImportError: # pragma: NO COVER +except ImportError: opentelemetry = None if opentelemetry is not None: diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index dbc5948b8..3953170fd 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -3408,6 +3408,7 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): def test_to_dataframe_progress_bar(self): pytest.importorskip("pandas") + pytest.importorskip("pyarrow") pytest.importorskip("tqdm") from google.cloud.bigquery.schema import SchemaField @@ -3427,7 +3428,6 @@ def test_to_dataframe_progress_bar(self): progress_bars = ( ("tqdm", mock.patch("tqdm.tqdm")), - ("tqdm_notebook", mock.patch("tqdm.notebook.tqdm")), ("tqdm_gui", mock.patch("tqdm.tqdm_gui")), ) @@ -3444,9 +3444,43 @@ def test_to_dataframe_progress_bar(self): progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) + def test_to_dataframe_progress_bar_notebook(self): + pytest.importorskip("pandas") + pytest.importorskip("pyarrow") + pytest.importorskip("tqdm") + pytest.importorskip("tqdm.notebook") + + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + + with mock.patch("tqdm.notebook.tqdm") as progress_bar_mock: + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + df = row_iterator.to_dataframe( + progress_bar_type="tqdm_notebook", + create_bqstorage_client=False, + ) + + progress_bar_mock.assert_called() + progress_bar_mock().update.assert_called() + progress_bar_mock().close.assert_called_once() + self.assertEqual(len(df), 4) + @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): pytest.importorskip("pandas") + pytest.importorskip("pyarrow") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -3711,7 +3745,7 @@ def test_to_dataframe_w_dtypes_mapper(self): if hasattr(pandas, "Float64Dtype"): self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) self.assertEqual(df.miles.dtype.name, "Float64") - else: # pragma: NO COVER + else: self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) self.assertEqual(df.miles.dtype.name, "string") diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index b38568561..02a7a6a79 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -16,11 +16,6 @@ import decimal from unittest import mock -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata - import pytest from google.cloud import bigquery @@ -31,11 +26,6 @@ TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data" -if pandas is not None: # pragma: NO COVER - PANDAS_INSTALLED_VERSION = metadata.version("pandas") -else: # pragma: NO COVER - PANDAS_INSTALLED_VERSION = "0.0.0" - @pytest.fixture def class_under_test(): @@ -44,7 +34,10 @@ def class_under_test(): return RowIterator -@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="") +@pytest.mark.skipif( + pandas.__version__.startswith("2."), + reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", +) def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): # See tests/system/test_arrow.py for the actual types we get from the API. arrow_schema = pyarrow.schema( From 31bae06d1aec2a31177271beaa54be0c1f626810 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 1 Apr 2024 14:07:17 +0200 Subject: [PATCH 281/536] chore(deps): update all dependencies (#1875) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index bdaead5b1..1c7bfa5b3 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -32,9 +32,9 @@ pandas==2.2.1; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==15.0.2; python_version >= '3.8' -pyasn1==0.5.1; python_version == '3.7' +pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' -pyasn1-modules==0.3.0; python_version == '3.7' +pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' pycparser==2.21 pyparsing==3.1.2 From a4bb5629d867e8c6933ddfc1c2031d7dec85b9c5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 07:42:46 -0500 Subject: [PATCH 282/536] chore(main): release 3.20.1 (#1880) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 578df101f..95af2d213 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.20.1](https://github.com/googleapis/python-bigquery/compare/v3.20.0...v3.20.1) (2024-04-01) + + +### Bug Fixes + +* Make `pyarrow` an optional dependency post-3.20.0 yanked release ([#1879](https://github.com/googleapis/python-bigquery/issues/1879)) ([21714e1](https://github.com/googleapis/python-bigquery/commit/21714e18bad8d8d89ed5642dbdb61d14e97d5f33)) + ## [3.20.0](https://github.com/googleapis/python-bigquery/compare/v3.19.0...v3.20.0) (2024-03-27) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 4537b8250..55093e390 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.20.0" +__version__ = "3.20.1" From c852c153c55025ba1187d61e313ead2308616c55 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 3 Apr 2024 07:09:21 -0400 Subject: [PATCH 283/536] fix: creates linting-typing.cfg in presubmit (#1881) * creates linting-typing.cfg in presubmit * attempt to filter out linting and typing tests from presubmit * lints and blackens this commit * revise environmental variables * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update .kokoro/presubmit/linting-typing.cfg * Update .kokoro/presubmit/linting-typing.cfg * Update .kokoro/presubmit/linting-typing.cfg * Update .kokoro/presubmit/presubmit.cfg * Update .kokoro/presubmit/presubmit.cfg --- .kokoro/presubmit/linting-typing.cfg | 7 +++++++ .kokoro/presubmit/presubmit.cfg | 4 ++++ noxfile.py | 24 ++++++++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 .kokoro/presubmit/linting-typing.cfg diff --git a/.kokoro/presubmit/linting-typing.cfg b/.kokoro/presubmit/linting-typing.cfg new file mode 100644 index 000000000..b1a7406c2 --- /dev/null +++ b/.kokoro/presubmit/linting-typing.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run these nox sessions. +env_vars: { + key: "NOX_SESSION" + value: "lint lint_setup_py blacken mypy mypy_samples pytype" +} diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index 17d071cae..fa39b1118 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -9,3 +9,7 @@ env_vars: { key: "RUN_SNIPPETS_TESTS" value: "false" } +env_vars: { + key: "RUN_LINTING_TYPING_TESTS" + value: "false" +} diff --git a/noxfile.py b/noxfile.py index 3adb4ba70..034bb843a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -132,6 +132,10 @@ def unit_noextras(session): def mypy(session): """Run type checks with mypy.""" + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("-e", ".[all]") session.install(MYPY_VERSION) @@ -153,6 +157,10 @@ def pytype(session): # recent version avoids the error until a possibly better fix is found. # https://github.com/googleapis/python-bigquery/issues/655 + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) @@ -213,6 +221,10 @@ def system(session): def mypy_samples(session): """Run type checks with mypy.""" + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("pytest") for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): session.install("-r", str(requirements_path)) @@ -394,6 +406,10 @@ def lint(session): serious code quality issues. """ + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("flake8", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) @@ -408,6 +424,10 @@ def lint(session): def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("docutils", "Pygments") session.run("python", "setup.py", "check", "--restructuredtext", "--strict") @@ -418,6 +438,10 @@ def blacken(session): Format code to uniform standard. """ + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install(BLACK_VERSION) session.run("black", *BLACK_PATHS) From d08ca708ba91b01fe5e7095e612c326cd3bcfe98 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 8 Apr 2024 21:16:14 +0200 Subject: [PATCH 284/536] chore(deps): update all dependencies (#1882) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin pycparser==2.21 for python 3.7 --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1c7bfa5b3..76b1a7b6b 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -36,7 +36,8 @@ pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' -pycparser==2.21 +pycparser==2.21; python_version == '3.7' +pycparser==2.22; python_version >= '3.8' pyparsing==3.1.2 python-dateutil==2.9.0.post0 pytz==2024.1 From 3634405fa1b40ae5f69b06d7c7f8de4e3d246d92 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 10 Apr 2024 13:58:29 -0700 Subject: [PATCH 285/536] feat: support RANGE in queries Part 1: JSON (#1884) * feat: support range in queries as dict * fix sys tests * lint * fix typo --- google/cloud/bigquery/_helpers.py | 41 ++++++++++++ tests/system/helpers.py | 5 ++ tests/system/test_query.py | 6 +- tests/unit/test__helpers.py | 105 +++++++++++++++++++++++++++++- 4 files changed, 153 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 7198b60c2..0572867d7 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -309,6 +309,46 @@ def _json_from_json(value, field): return None +def _range_element_from_json(value, field): + """Coerce 'value' to a range element value, if set or not nullable.""" + if value == "UNBOUNDED": + return None + elif field.element_type == "DATE": + return _date_from_json(value, None) + elif field.element_type == "DATETIME": + return _datetime_from_json(value, None) + elif field.element_type == "TIMESTAMP": + return _timestamp_from_json(value, None) + else: + raise ValueError(f"Unsupported range field type: {value}") + + +def _range_from_json(value, field): + """Coerce 'value' to a range, if set or not nullable. + + Args: + value (str): The literal representation of the range. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[dict]: + The parsed range object from ``value`` if the ``field`` is not + null (otherwise it is :data:`None`). + """ + range_literal = re.compile(r"\[.*, .*\)") + if _not_null(value, field): + if range_literal.match(value): + start, end = value[1:-1].split(", ") + start = _range_element_from_json(start, field.range_element_type) + end = _range_element_from_json(end, field.range_element_type) + return {"start": start, "end": end} + else: + raise ValueError(f"Unknown range format: {value}") + else: + return None + + # Parse BigQuery API response JSON into a Python representation. _CELLDATA_FROM_JSON = { "INTEGER": _int_from_json, @@ -329,6 +369,7 @@ def _json_from_json(value, field): "TIME": _time_from_json, "RECORD": _record_from_json, "JSON": _json_from_json, + "RANGE": _range_from_json, } _QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) diff --git a/tests/system/helpers.py b/tests/system/helpers.py index 721f55040..7fd344eeb 100644 --- a/tests/system/helpers.py +++ b/tests/system/helpers.py @@ -25,6 +25,7 @@ _naive = datetime.datetime(2016, 12, 5, 12, 41, 9) _naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) _stamp = "%s %s" % (_naive.date().isoformat(), _naive.time().isoformat()) +_date = _naive.date().isoformat() _stamp_microseconds = _stamp + ".250000" _zoned = _naive.replace(tzinfo=UTC) _zoned_microseconds = _naive_microseconds.replace(tzinfo=UTC) @@ -78,6 +79,10 @@ ), ("SELECT ARRAY(SELECT STRUCT([1, 2]))", [{"_field_1": [1, 2]}]), ("SELECT ST_GeogPoint(1, 2)", "POINT(1 2)"), + ( + "SELECT RANGE '[UNBOUNDED, %s)'" % _date, + {"start": None, "end": _naive.date()}, + ), ] diff --git a/tests/system/test_query.py b/tests/system/test_query.py index 0494272d9..d94a117e3 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -425,7 +425,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_date", - "[2016-12-05, UNBOUNDED)", + {"end": None, "start": datetime.date(2016, 12, 5)}, [ RangeQueryParameter( name="range_date", @@ -436,7 +436,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_datetime", - "[2016-12-05T00:00:00, UNBOUNDED)", + {"end": None, "start": datetime.datetime(2016, 12, 5, 0, 0)}, [ RangeQueryParameter( name="range_datetime", @@ -447,7 +447,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_unbounded", - "[UNBOUNDED, UNBOUNDED)", + {"end": None, "start": None}, [ RangeQueryParameter( name="range_unbounded", diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 320c57737..a50625e2a 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -452,6 +452,99 @@ def test_w_bogus_string_value(self): self._call_fut("12:12:27.123", object()) +class Test_range_from_json(unittest.TestCase): + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _range_from_json + + return _range_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field("REQUIRED")) + + def test_w_wrong_format(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + with self.assertRaises(ValueError): + self._call_fut("[2009-06-172019-06-17)", range_field) + + def test_w_wrong_element_type(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="TIME"), + ) + with self.assertRaises(ValueError): + self._call_fut("[15:31:38, 15:50:38)", range_field) + + def test_w_unbounded_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + coerced = self._call_fut("[UNBOUNDED, 2019-06-17)", range_field) + self.assertEqual( + coerced, + {"start": None, "end": datetime.date(2019, 6, 17)}, + ) + + def test_w_date_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + coerced = self._call_fut("[2009-06-17, 2019-06-17)", range_field) + self.assertEqual( + coerced, + { + "start": datetime.date(2009, 6, 17), + "end": datetime.date(2019, 6, 17), + }, + ) + + def test_w_datetime_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATETIME"), + ) + coerced = self._call_fut( + "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field + ) + self.assertEqual( + coerced, + { + "start": datetime.datetime(2009, 6, 17, 13, 45, 30), + "end": datetime.datetime(2019, 6, 17, 13, 45, 30), + }, + ) + + def test_w_timestamp_value(self): + from google.cloud._helpers import _EPOCH + + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="TIMESTAMP"), + ) + coerced = self._call_fut("[1234567, 1234789)", range_field) + self.assertEqual( + coerced, + { + "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567), + "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789), + }, + ) + + class Test_record_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _record_from_json @@ -1323,11 +1416,21 @@ def test_w_str(self): class _Field(object): - def __init__(self, mode, name="unknown", field_type="UNKNOWN", fields=()): + def __init__( + self, + mode, + name="unknown", + field_type="UNKNOWN", + fields=(), + range_element_type=None, + element_type=None, + ): self.mode = mode self.name = name self.field_type = field_type self.fields = fields + self.range_element_type = range_element_type + self.element_type = element_type def _field_isinstance_patcher(): From 38697fb942516fc2f6f5e21e19a11811fbaeb1f4 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 11 Apr 2024 13:49:15 -0400 Subject: [PATCH 286/536] feat: adds billing to opentel (#1889) --- google/cloud/bigquery/opentelemetry_tracing.py | 8 ++++++++ tests/unit/test_opentelemetry_tracing.py | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index e2a05e4d0..b5f6bf991 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -153,4 +153,12 @@ def _set_job_attributes(job_ref): if job_ref.num_child_jobs is not None: job_attributes["num_child_jobs"] = job_ref.num_child_jobs + total_bytes_billed = getattr(job_ref, "total_bytes_billed", None) + if total_bytes_billed is not None: + job_attributes["total_bytes_billed"] = total_bytes_billed + + total_bytes_processed = getattr(job_ref, "total_bytes_processed", None) + if total_bytes_processed is not None: + job_attributes["total_bytes_processed"] = total_bytes_processed + return job_attributes diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index 579d7b1b7..546cc02bd 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -142,6 +142,8 @@ def test_default_job_attributes(setup): "timeEnded": ended_time.isoformat(), "hasErrors": True, "state": "some_job_state", + "total_bytes_billed": 42, + "total_bytes_processed": 13, } with mock.patch("google.cloud.bigquery.job._AsyncJob") as test_job_ref: test_job_ref.job_id = "test_job_id" @@ -154,6 +156,8 @@ def test_default_job_attributes(setup): test_job_ref.ended = ended_time test_job_ref.error_result = error_result test_job_ref.state = "some_job_state" + test_job_ref.total_bytes_billed = 42 + test_job_ref.total_bytes_processed = 13 with opentelemetry_tracing.create_span( TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref @@ -180,6 +184,8 @@ def test_optional_job_attributes(setup): test_job_ref.state = "some_job_state" test_job_ref.num_child_jobs = None test_job_ref.parent_job_id = None + test_job_ref.total_bytes_billed = None + test_job_ref.total_bytes_processed = None with opentelemetry_tracing.create_span( TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref From 19394ab2ab2fa67b1995a3c5e53f06f99500d3f6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 11 Apr 2024 21:17:23 +0200 Subject: [PATCH 287/536] chore(deps): update all dependencies (#1891) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * revert pinned requirement version and add triple equal "===" prevents dependabot from attempting to upgrade it in the future --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 8561934dc..fee6806b7 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 76b1a7b6b..e11fa09cf 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -15,7 +15,7 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' google-api-core==2.18.0 google-auth==2.29.0 -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 @@ -36,7 +36,7 @@ pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' -pycparser==2.21; python_version == '3.7' +pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' pyparsing==3.1.2 python-dateutil==2.9.0.post0 @@ -47,7 +47,7 @@ rsa==4.9 Shapely==2.0.3 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.10.0; python_version >= '3.8' +typing-extensions==4.11.0; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 9179db067..05fd1907b 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.19.0 +google.cloud.bigquery==3.20.1 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 8f2e93620..40fba4b87 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,12 +1,12 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.8.3; python_version >= '3.9' +matplotlib==3.8.4; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.1; python_version >= '3.9' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index af9436c51..95f915364 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 From 5ed9ccee204b7cf8e96cb0e050f6830c05f3b4fd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 11 Apr 2024 15:33:30 -0400 Subject: [PATCH 288/536] feat: Add compression option ZSTD. (#1890) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add ZSTD to compression types * feat: adds tests re Compression types * revise datatype from Enum to object * adds license text and docstring * change object back to enum datatype * updates compression object comparison * updates Compression class * jsonify and sort the input and output for testing * Update tests/unit/job/test_extract.py * moved json import statement * removed enums test and file --------- Co-authored-by: Ethan Steinberg Co-authored-by: Tim Sweña (Swast) --- google/cloud/bigquery/enums.py | 5 ++++- tests/unit/job/test_extract.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index d75037ad1..1abe28381 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -22,7 +22,7 @@ class AutoRowIDs(enum.Enum): GENERATE_UUID = enum.auto() -class Compression(object): +class Compression(str, enum.Enum): """The compression type to use for exported files. The default value is :attr:`NONE`. @@ -39,6 +39,9 @@ class Compression(object): SNAPPY = "SNAPPY" """Specifies SNAPPY format.""" + ZSTD = "ZSTD" + """Specifies ZSTD format.""" + NONE = "NONE" """Specifies no compression.""" diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py index 76ee72f28..ee0d67d68 100644 --- a/tests/unit/job/test_extract.py +++ b/tests/unit/job/test_extract.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json from unittest import mock from ..helpers import make_connection @@ -45,9 +46,8 @@ def test_to_api_repr(self): config.print_header = False config._properties["extract"]["someNewField"] = "some-value" config.use_avro_logical_types = True - resource = config.to_api_repr() - self.assertEqual( - resource, + resource = json.dumps(config.to_api_repr(), sort_keys=True) + expected = json.dumps( { "extract": { "compression": "SNAPPY", @@ -58,6 +58,12 @@ def test_to_api_repr(self): "useAvroLogicalTypes": True, } }, + sort_keys=True, + ) + + self.assertEqual( + resource, + expected, ) def test_from_api_repr(self): From 5c6f7d9a98a84a9c39123dd621915f56f53d34bb Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 12 Apr 2024 17:18:51 +0200 Subject: [PATCH 289/536] chore(deps): update dependency idna to v3.7 [security] (#1896) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e11fa09cf..e4b63cdaa 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -22,7 +22,7 @@ google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 grpcio==1.62.1 -idna==3.6 +idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 packaging==24.0 From 88501c0cc3d88423c5e1b421fcd6b69cc72e7d51 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 12 Apr 2024 18:49:08 +0200 Subject: [PATCH 290/536] chore(deps): update all dependencies (#1893) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e4b63cdaa..b3d9bc841 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -28,7 +28,7 @@ mypy-extensions==1.0.0 packaging==24.0 pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.1; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==15.0.2; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 05fd1907b..61471a348 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -6,4 +6,4 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.1; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 40fba4b87..3960f47b9 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -9,4 +9,4 @@ matplotlib===3.7.4; python_version == '3.8' matplotlib==3.8.4; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.1; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' From a0fddbba1aac1ae94aa8ec75f9d0b158b430549b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:41:37 -0400 Subject: [PATCH 291/536] chore(python): bump idna from 3.4 to 3.7 in .kokoro (#1897) * chore(python): bump idna from 3.4 to 3.7 in .kokoro Source-Link: https://github.com/googleapis/synthtool/commit/d50980e704793a2d3310bfb3664f3a82f24b5796 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:5a4c19d17e597b92d786e569be101e636c9c2817731f80a5adec56b2aa8fe070 * Apply changes from googleapis/synthtool#1950 --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- .github/auto-label.yaml | 5 +++++ .github/blunderbuss.yml | 17 +++++++++++++++++ .kokoro/requirements.txt | 6 +++--- docs/index.rst | 5 +++++ docs/summary_overview.md | 22 ++++++++++++++++++++++ 6 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 .github/blunderbuss.yml create mode 100644 docs/summary_overview.md diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index dc9c56e9d..81f87c569 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:a8a80fc6456e433df53fc2a0d72ca0345db0ddefb409f1b75b118dfd1babd952 -# created: 2024-03-15T16:25:47.905264637Z \ No newline at end of file + digest: sha256:5a4c19d17e597b92d786e569be101e636c9c2817731f80a5adec56b2aa8fe070 +# created: 2024-04-12T11:35:58.922854369Z diff --git a/.github/auto-label.yaml b/.github/auto-label.yaml index b2016d119..8b37ee897 100644 --- a/.github/auto-label.yaml +++ b/.github/auto-label.yaml @@ -13,3 +13,8 @@ # limitations under the License. requestsize: enabled: true + +path: + pullrequest: true + paths: + samples: "samples" diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml new file mode 100644 index 000000000..5b7383dc7 --- /dev/null +++ b/.github/blunderbuss.yml @@ -0,0 +1,17 @@ +# Blunderbuss config +# +# This file controls who is assigned for pull requests and issues. +# Note: This file is autogenerated. To make changes to the assignee +# team, please update `codeowner_team` in `.repo-metadata.json`. +assign_issues: + - googleapis/api-bigquery + +assign_issues_by: + - labels: + - "samples" + to: + - googleapis/python-samples-reviewers + - googleapis/api-bigquery + +assign_prs: + - googleapis/api-bigquery diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index dd61f5f32..51f92b8e1 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -252,9 +252,9 @@ googleapis-common-protos==1.61.0 \ --hash=sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0 \ --hash=sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b # via google-api-core -idna==3.4 \ - --hash=sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4 \ - --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 +idna==3.7 \ + --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ + --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 # via requests importlib-metadata==6.8.0 \ --hash=sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb \ diff --git a/docs/index.rst b/docs/index.rst index 500c67a7f..6d6ed63f6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -48,3 +48,8 @@ For a list of all ``google-cloud-bigquery`` releases: :maxdepth: 2 changelog + +.. toctree:: + :hidden: + + summary_overview.md diff --git a/docs/summary_overview.md b/docs/summary_overview.md new file mode 100644 index 000000000..6dd228e13 --- /dev/null +++ b/docs/summary_overview.md @@ -0,0 +1,22 @@ +[ +This is a templated file. Adding content to this file may result in it being +reverted. Instead, if you want to place additional content, create an +"overview_content.md" file in `docs/` directory. The Sphinx tool will +pick up on the content and merge the content. +]: # + +# Google Cloud BigQuery API + +Overview of the APIs available for Google Cloud BigQuery API. + +## All entries + +Classes, methods and properties & attributes for +Google Cloud BigQuery API. + +[classes](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_class.html) + +[methods](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_method.html) + +[properties and +attributes](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_property.html) From 82ae908fbf3b2361343fff1859d3533383dc50ec Mon Sep 17 00:00:00 2001 From: Toran Sahu Date: Fri, 12 Apr 2024 23:40:17 +0530 Subject: [PATCH 292/536] =?UTF-8?q?fix:=20Remove=20duplicate=20key=20time?= =?UTF-8?q?=5Fpartitioning=20from=20Table.=5FPROPERTY=5FTO=5FA=E2=80=A6=20?= =?UTF-8?q?(#1898)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …PI_FIELD Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- google/cloud/bigquery/table.py | 1 - 1 file changed, 1 deletion(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index c002822fe..73e755e9e 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -385,7 +385,6 @@ class Table(_TableBase): "clone_definition": "cloneDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", - "time_partitioning": "timePartitioning", "type": "type", "view_use_legacy_sql": "view", "view_query": "view", From bf8861c3473a1af978db7a06463ddc0bad86f326 Mon Sep 17 00:00:00 2001 From: kserruys Date: Fri, 12 Apr 2024 20:42:29 +0200 Subject: [PATCH 293/536] fix: add types to DatasetReference constructor (#1601) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add types to DatasetReference constructor * fix: add types to DatasetReference constructor * fix: DatasetReference.from_string test coverage --------- Co-authored-by: Karel Serruys Co-authored-by: Chalmer Lowe Co-authored-by: meredithslota Co-authored-by: Tim Sweña (Swast) --- google/cloud/bigquery/dataset.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index c313045ce..c49a52faf 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -92,7 +92,7 @@ class DatasetReference(object): ValueError: If either argument is not of type ``str``. """ - def __init__(self, project, dataset_id): + def __init__(self, project: str, dataset_id: str): if not isinstance(project, str): raise ValueError("Pass a string for project") if not isinstance(dataset_id, str): @@ -166,22 +166,24 @@ def from_string( standard SQL format. """ output_dataset_id = dataset_id - output_project_id = default_project parts = _helpers._split_id(dataset_id) - if len(parts) == 1 and not default_project: - raise ValueError( - "When default_project is not set, dataset_id must be a " - "fully-qualified dataset ID in standard SQL format, " - 'e.g., "project.dataset_id" got {}'.format(dataset_id) - ) + if len(parts) == 1: + if default_project is not None: + output_project_id = default_project + else: + raise ValueError( + "When default_project is not set, dataset_id must be a " + "fully-qualified dataset ID in standard SQL format, " + 'e.g., "project.dataset_id" got {}'.format(dataset_id) + ) elif len(parts) == 2: output_project_id, output_dataset_id = parts - elif len(parts) > 2: + else: raise ValueError( "Too many parts in dataset_id. Expected a fully-qualified " - "dataset ID in standard SQL format. e.g. " - '"project.dataset_id", got {}'.format(dataset_id) + "dataset ID in standard SQL format, " + 'e.g. "project.dataset_id", got {}'.format(dataset_id) ) return cls(output_project_id, output_dataset_id) From 1367b584b68d917ec325ce4383a0e9a36205b894 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 18 Apr 2024 09:31:40 -0500 Subject: [PATCH 294/536] fix: avoid unnecessary API call in QueryJob.result() when job is already finished (#1900) fix: retry query job after ambiguous failures Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/_job_helpers.py | 9 +- google/cloud/bigquery/job/query.py | 172 ++++++++----- google/cloud/bigquery/retry.py | 52 +++- tests/unit/job/test_query.py | 334 +++++++++++++++----------- tests/unit/test__job_helpers.py | 38 ++- tests/unit/test_job_retry.py | 172 ++++++++++++- 6 files changed, 547 insertions(+), 230 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 602a49eba..290439394 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -258,15 +258,16 @@ def _to_query_job( errors = query_response["errors"] query_job._properties["status"]["errors"] = errors - # Transform job state so that QueryJob doesn't try to restart the query. + # Avoid an extra call to `getQueryResults` if the query has finished. job_complete = query_response.get("jobComplete") if job_complete: - query_job._properties["status"]["state"] = "DONE" query_job._query_results = google.cloud.bigquery.query._QueryResults( query_response ) - else: - query_job._properties["status"]["state"] = "PENDING" + + # We want job.result() to refresh the job state, so the conversion is + # always "PENDING", even if the job is finished. + query_job._properties["status"]["state"] = "PENDING" return query_job diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index e92e9cb9e..7436b6013 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -17,11 +17,11 @@ import concurrent.futures import copy import re +import time import typing from typing import Any, Dict, Iterable, List, Optional, Union from google.api_core import exceptions -from google.api_core.future import polling as polling_future from google.api_core import retry as retries import requests @@ -1383,7 +1383,7 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): def _reload_query_results( self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None ): - """Refresh the cached query results. + """Refresh the cached query results unless already cached and complete. Args: retry (Optional[google.api_core.retry.Retry]): @@ -1392,6 +1392,8 @@ def _reload_query_results( The number of seconds to wait for the underlying HTTP transport before using ``retry``. """ + # Optimization: avoid a call to jobs.getQueryResults if it's already + # been fetched, e.g. from jobs.query first page of results. if self._query_results and self._query_results.complete: return @@ -1430,40 +1432,6 @@ def _reload_query_results( timeout=transport_timeout, ) - def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): - """Check if the query has finished running and raise if it's not. - - If the query has finished, also reload the job itself. - """ - # If an explicit timeout is not given, fall back to the transport timeout - # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout - - try: - self._reload_query_results(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - # Reloading also updates error details on self, thus no need for an - # explicit self.set_exception() call if reloading succeeds. - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError: - # Use the query results reload exception, as it generally contains - # much more useful error information. - self.set_exception(exc) - finally: - return - - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if not self._query_results.complete: - raise polling_future._OperationNotComplete() - else: - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - self.set_exception(exc) - def result( # type: ignore # (incompatible with supertype) self, page_size: Optional[int] = None, @@ -1528,6 +1496,10 @@ def result( # type: ignore # (incompatible with supertype) If Non-``None`` and non-default ``job_retry`` is provided and the job is not retryable. """ + # Note: Since waiting for a query job to finish is more complex than + # refreshing the job state in a loop, we avoid calling the superclass + # in this method. + if self.dry_run: return _EmptyRowIterator( project=self.project, @@ -1548,46 +1520,124 @@ def result( # type: ignore # (incompatible with supertype) " provided to the query that created this job." ) - first = True + restart_query_job = False + + def is_job_done(): + nonlocal restart_query_job - def do_get_result(): - nonlocal first + if restart_query_job: + restart_query_job = False - if first: - first = False - else: + # The original job has failed. Create a new one. + # # Note that we won't get here if retry_do_query is # None, because we won't use a retry. - - # The orinal job is failed. Create a new one. job = retry_do_query() - # If it's already failed, we might as well stop: - if job.done() and job.exception() is not None: - raise job.exception() - # Become the new job: self.__dict__.clear() self.__dict__.update(job.__dict__) - # This shouldn't be necessary, because once we have a good - # job, it should stay good,and we shouldn't have to retry. - # But let's be paranoid. :) + # It's possible the job fails again and we'll have to + # retry that too. self._retry_do_query = retry_do_query self._job_retry = job_retry - super(QueryJob, self).result(retry=retry, timeout=timeout) - - # Since the job could already be "done" (e.g. got a finished job - # via client.get_job), the superclass call to done() might not - # set the self._query_results cache. - if self._query_results is None or not self._query_results.complete: - self._reload_query_results(retry=retry, timeout=timeout) + # Refresh the job status with jobs.get because some of the + # exceptions thrown by jobs.getQueryResults like timeout and + # rateLimitExceeded errors are ambiguous. We want to know if + # the query job failed and not just the call to + # jobs.getQueryResults. + if self.done(retry=retry, timeout=timeout): + # If it's already failed, we might as well stop. + job_failed_exception = self.exception() + if job_failed_exception is not None: + # Only try to restart the query job if the job failed for + # a retriable reason. For example, don't restart the query + # if the call to reload the job metadata within self.done() + # timed out. + # + # The `restart_query_job` must only be called after a + # successful call to the `jobs.get` REST API and we + # determine that the job has failed. + # + # The `jobs.get` REST API + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get) + # is called via `self.done()` which calls + # `self.reload()`. + # + # To determine if the job failed, the `self.exception()` + # is set from `self.reload()` via + # `self._set_properties()`, which translates the + # `Job.status.errorResult` field + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.error_result) + # into an exception that can be processed by the + # `job_retry` predicate. + restart_query_job = True + raise job_failed_exception + else: + # Make sure that the _query_results are cached so we + # can return a complete RowIterator. + # + # Note: As an optimization, _reload_query_results + # doesn't make any API calls if the query results are + # already cached and have jobComplete=True in the + # response from the REST API. This ensures we aren't + # making any extra API calls if the previous loop + # iteration fetched the finished job. + self._reload_query_results(retry=retry, timeout=timeout) + return True + + # Call jobs.getQueryResults with max results set to 0 just to + # wait for the query to finish. Unlike most methods, + # jobs.getQueryResults hangs as long as it can to ensure we + # know when the query has finished as soon as possible. + self._reload_query_results(retry=retry, timeout=timeout) + + # Even if the query is finished now according to + # jobs.getQueryResults, we'll want to reload the job status if + # it's not already DONE. + return False if retry_do_query is not None and job_retry is not None: - do_get_result = job_retry(do_get_result) - - do_get_result() + is_job_done = job_retry(is_job_done) + + # timeout can be a number of seconds, `None`, or a + # `google.api_core.future.polling.PollingFuture._DEFAULT_VALUE` + # sentinel object indicating a default timeout if we choose to add + # one some day. This value can come from our PollingFuture + # superclass and was introduced in + # https://github.com/googleapis/python-api-core/pull/462. + if isinstance(timeout, (float, int)): + remaining_timeout = timeout + else: + # Note: we may need to handle _DEFAULT_VALUE as a separate + # case someday, but even then the best we can do for queries + # is 72+ hours for hyperparameter tuning jobs: + # https://cloud.google.com/bigquery/quotas#query_jobs + # + # The timeout for a multi-statement query is 24+ hours. See: + # https://cloud.google.com/bigquery/quotas#multi_statement_query_limits + remaining_timeout = None + + if remaining_timeout is None: + # Since is_job_done() calls jobs.getQueryResults, which is a + # long-running API, don't delay the next request at all. + while not is_job_done(): + pass + else: + # Use a monotonic clock since we don't actually care about + # daylight savings or similar, just the elapsed time. + previous_time = time.monotonic() + + while not is_job_done(): + current_time = time.monotonic() + elapsed_time = current_time - previous_time + remaining_timeout = remaining_timeout - elapsed_time + previous_time = current_time + + if remaining_timeout < 0: + raise concurrent.futures.TimeoutError() except exceptions.GoogleAPICallError as exc: exc.message = _EXCEPTION_FOOTER_TEMPLATE.format( diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 01b127972..c9898287f 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -36,10 +36,25 @@ _DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes -# Allow for a few retries after the API request times out. This relevant for -# rateLimitExceeded errors, which can be raised either by the Google load -# balancer or the BigQuery job server. -_DEFAULT_JOB_DEADLINE = 3.0 * _DEFAULT_RETRY_DEADLINE +# Ambiguous errors (e.g. internalError, backendError, rateLimitExceeded) retry +# until the full `_DEFAULT_RETRY_DEADLINE`. This is because the +# `jobs.getQueryResults` REST API translates a job failure into an HTTP error. +# +# TODO(https://github.com/googleapis/python-bigquery/issues/1903): Investigate +# if we can fail early for ambiguous errors in `QueryJob.result()`'s call to +# the `jobs.getQueryResult` API. +# +# We need `_DEFAULT_JOB_DEADLINE` to be some multiple of +# `_DEFAULT_RETRY_DEADLINE` to allow for a few retries after the retry +# timeout is reached. +# +# Note: This multiple should actually be a multiple of +# (2 * _DEFAULT_RETRY_DEADLINE). After an ambiguous exception, the first +# call from `job_retry()` refreshes the job state without actually restarting +# the query. The second `job_retry()` actually restarts the query. For a more +# detailed explanation, see the comments where we set `restart_query_job = True` +# in `QueryJob.result()`'s inner `is_job_done()` function. +_DEFAULT_JOB_DEADLINE = 2.0 * (2.0 * _DEFAULT_RETRY_DEADLINE) def _should_retry(exc): @@ -66,6 +81,11 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ +# Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We +# briefly had a default timeout, but even setting it at more than twice the +# theoretical server-side default timeout of 2 minutes was not enough for +# complex queries. See: +# https://github.com/googleapis/python-bigquery/issues/970#issuecomment-921934647 DEFAULT_TIMEOUT = None """The default API timeout. @@ -73,10 +93,32 @@ def _should_retry(exc): deadline on the retry object. """ -job_retry_reasons = "rateLimitExceeded", "backendError", "jobRateLimitExceeded" +job_retry_reasons = ( + "rateLimitExceeded", + "backendError", + "internalError", + "jobRateLimitExceeded", +) def _job_should_retry(exc): + # Sometimes we have ambiguous errors, such as 'backendError' which could + # be due to an API problem or a job problem. For these, make sure we retry + # our is_job_done() function. + # + # Note: This won't restart the job unless we know for sure it's because of + # the job status and set restart_query_job = True in that loop. This means + # that we might end up calling this predicate twice for the same job + # but from different paths: (1) from jobs.getQueryResults RetryError and + # (2) from translating the job error from the body of a jobs.get response. + # + # Note: If we start retrying job types other than queries where we don't + # call the problematic getQueryResults API to check the status, we need + # to provide a different predicate, as there shouldn't be ambiguous + # errors in those cases. + if isinstance(exc, exceptions.RetryError): + exc = exc.cause + if not hasattr(exc, "errors") or len(exc.errors) == 0: return False diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 37ac7ba5e..0fee053e3 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -13,6 +13,7 @@ # limitations under the License. import concurrent +import concurrent.futures import copy import http import textwrap @@ -371,100 +372,6 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) - def test__done_or_raise_w_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job._done_or_raise(timeout=42) - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args[0][1] - self.assertEqual(call_args.timeout, 600.0) - - call_args = fake_reload.call_args[1] - self.assertEqual(call_args["timeout"], 42) - - def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._done_timeout = 8.8 - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job._done_or_raise(timeout=5.5) - - # The expected timeout used is simply the given timeout, as the latter - # is shorter than the job's internal done timeout. - expected_timeout = 5.5 - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args[0][1] - self.assertAlmostEqual(call_args.timeout, 600.0) - - call_args = fake_reload.call_args - self.assertAlmostEqual(call_args[1].get("timeout"), expected_timeout) - - def test__done_or_raise_w_query_results_error_reload_ok(self): - client = _make_client(project=self.PROJECT) - bad_request_error = exceptions.BadRequest("Error in query") - client._get_query_results = mock.Mock(side_effect=bad_request_error) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._exception = None - - def fake_reload(self, *args, **kwargs): - self._properties["status"]["state"] = "DONE" - self.set_exception(copy.copy(bad_request_error)) - - fake_reload_method = types.MethodType(fake_reload, job) - - with mock.patch.object(job, "reload", new=fake_reload_method): - job._done_or_raise() - - assert isinstance(job._exception, exceptions.BadRequest) - - def test__done_or_raise_w_query_results_error_reload_error(self): - client = _make_client(project=self.PROJECT) - bad_request_error = exceptions.BadRequest("Error in query") - client._get_query_results = mock.Mock(side_effect=bad_request_error) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - reload_error = exceptions.DataLoss("Oops, sorry!") - job.reload = mock.Mock(side_effect=reload_error) - job._exception = None - - job._done_or_raise() - - assert job._exception is bad_request_error - - def test__done_or_raise_w_job_query_results_ok_reload_error(self): - client = _make_client(project=self.PROJECT) - query_results = google.cloud.bigquery.query._QueryResults( - properties={ - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": "12345"}, - } - ) - client._get_query_results = mock.Mock(return_value=query_results) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) - job.reload = mock.Mock(side_effect=retry_error) - job._exception = None - - job._done_or_raise() - - assert job._exception is retry_error - def test_query_plan(self): from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.job import QueryPlanEntry @@ -933,7 +840,12 @@ def test_search_stats(self): assert isinstance(job.search_stats, SearchStats) assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED" - def test_result(self): + def test_result_reloads_job_state_until_done(self): + """Verify that result() doesn't return until state == 'DONE'. + + This test verifies correctness for a possible sequence of API responses + that might cause internal customer issue b/332850329. + """ from google.cloud.bigquery.table import RowIterator query_resource = { @@ -970,7 +882,54 @@ def test_result(self): "rows": [{"f": [{"v": "abc"}]}], } conn = make_connection( - query_resource, query_resource_done, job_resource_done, query_page_resource + # QueryJob.result() makes a pair of jobs.get & jobs.getQueryResults + # REST API calls each iteration to determine if the job has finished + # or not. + # + # jobs.get (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get) + # is necessary to make sure the job has really finished via + # `Job.status.state == "DONE"` and to get necessary properties for + # `RowIterator` like the destination table. + # + # jobs.getQueryResults + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults) + # with maxResults == 0 is technically optional, + # but it hangs up to 10 seconds until the job has finished. This + # makes sure we can know when the query has finished as close as + # possible to when the query finishes. It also gets properties + # necessary for `RowIterator` that isn't available on the job + # resource such as the schema + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults#body.GetQueryResultsResponse.FIELDS.schema) + # of the results. + job_resource, + query_resource, + # The query wasn't finished in the last call to jobs.get, so try + # again with a call to both jobs.get & jobs.getQueryResults. + job_resource, + query_resource_done, + # Even though, the previous jobs.getQueryResults response says + # the job is complete, we haven't downloaded the full job status + # yet. + # + # Important: per internal issue 332850329, this reponse has + # `Job.status.state = "RUNNING"`. This ensures we are protected + # against possible eventual consistency issues where + # `jobs.getQueryResults` says jobComplete == True, but our next + # call to `jobs.get` still doesn't have + # `Job.status.state == "DONE"`. + job_resource, + # Try again until `Job.status.state == "DONE"`. + # + # Note: the call to `jobs.getQueryResults` is missing here as + # an optimization. We already received a "completed" response, so + # we won't learn anything new by calling that API again. + job_resource, + job_resource_done, + # When we iterate over the `RowIterator` we return from + # `QueryJob.result()`, we make additional calls to + # `jobs.getQueryResults` but this time allowing the actual rows + # to be returned as well. + query_page_resource, ) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1013,8 +972,32 @@ def test_result(self): }, timeout=None, ) + # Ensure that we actually made the expected API calls in the sequence + # we thought above at the make_connection() call above. + # + # Note: The responses from jobs.get and jobs.getQueryResults can be + # deceptively similar, so this check ensures we actually made the + # requests we expected. conn.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call, query_page_call] + [ + # jobs.get & jobs.getQueryResults because the job just started. + reload_call, + query_results_call, + # jobs.get & jobs.getQueryResults because the query is still + # running. + reload_call, + query_results_call, + # We got a jobComplete response from the most recent call to + # jobs.getQueryResults, so now call jobs.get until we get + # `Jobs.status.state == "DONE"`. This tests a fix for internal + # issue b/332850329. + reload_call, + reload_call, + reload_call, + # jobs.getQueryResults without `maxResults` set to download + # the rows as we iterate over the `RowIterator`. + query_page_call, + ] ) def test_result_dry_run(self): @@ -1069,7 +1052,7 @@ def test_result_with_done_job_calls_get_query_results(self): method="GET", path=query_results_path, query_params={"maxResults": 0, "location": "EU"}, - timeout=None, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, ) query_results_page_call = mock.call( method="GET", @@ -1107,7 +1090,10 @@ def test_result_with_done_jobs_query_response_doesnt_call_get_query_results(self request_config=None, query_response=query_resource_done, ) - assert job.state == "DONE" + + # We want job.result() to refresh the job state, so the conversion is + # always "PENDING", even if the job is finished. + assert job.state == "PENDING" result = job.result() @@ -1156,7 +1142,9 @@ def test_result_with_done_jobs_query_response_and_page_size_invalidates_cache(se request_config=None, query_response=query_resource_done, ) - assert job.state == "DONE" + # We want job.result() to refresh the job state, so the conversion is + # always "PENDING", even if the job is finished. + assert job.state == "PENDING" # Act result = job.result(page_size=3) @@ -1230,7 +1218,7 @@ def test_result_with_max_results(self): query_page_request[1]["query_params"]["maxResults"], max_results ) - def test_result_w_retry(self): + def test_result_w_custom_retry(self): from google.cloud.bigquery.table import RowIterator query_resource = { @@ -1254,12 +1242,24 @@ def test_result_w_retry(self): } connection = make_connection( + # Also, for each API request, raise an exception that we know can + # be retried. Because of this, for each iteration we do: + # jobs.get (x2) & jobs.getQueryResults (x2) + exceptions.NotFound("not normally retriable"), + job_resource, exceptions.NotFound("not normally retriable"), query_resource, + # Query still not done, repeat both. exceptions.NotFound("not normally retriable"), - query_resource_done, + job_resource, exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + # Query still not done, repeat both. job_resource_done, + exceptions.NotFound("not normally retriable"), + query_resource_done, + # Query finished! ) client = _make_client(self.PROJECT, connection=connection) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1279,7 +1279,10 @@ def test_result_w_retry(self): method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", query_params={"maxResults": 0, "location": "asia-northeast1"}, - timeout=None, + # TODO(tswast): Why do we end up setting timeout to + # google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT in + # some cases but not others? + timeout=mock.ANY, ) reload_call = mock.call( method="GET", @@ -1289,7 +1292,26 @@ def test_result_w_retry(self): ) connection.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call] + [ + # See make_connection() call above for explanation of the + # expected API calls. + # + # Query not done. + reload_call, + reload_call, + query_results_call, + query_results_call, + # Query still not done. + reload_call, + reload_call, + query_results_call, + query_results_call, + # Query done! + reload_call, + reload_call, + query_results_call, + query_results_call, + ] ) def test_result_w_empty_schema(self): @@ -1316,41 +1338,60 @@ def test_result_w_empty_schema(self): self.assertEqual(result.location, "asia-northeast1") self.assertEqual(result.query_id, "xyz-abc") - def test_result_invokes_begins(self): + def test_result_w_timeout_doesnt_raise(self): + import google.cloud.bigquery.client + begun_resource = self._make_resource() - incomplete_resource = { - "jobComplete": False, + query_resource = { + "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, } - query_resource = copy.deepcopy(incomplete_resource) - query_resource["jobComplete"] = True done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = make_connection( - begun_resource, - incomplete_resource, - query_resource, - done_resource, - query_resource, - ) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["jobReference"]["location"] = "US" - job.result() + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): + job.result( + # Test that fractional seconds are supported, but use a timeout + # that is representable as a floating point without rounding + # errors since it can be represented exactly in base 2. In this + # case 1.125 is 9 / 8, which is a fraction with a power of 2 in + # the denominator. + timeout=1.125, + ) - self.assertEqual(len(connection.api_request.call_args_list), 4) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[2] - reload_request = connection.api_request.call_args_list[3] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual(reload_request[1]["method"], "GET") + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={"location": "US"}, + timeout=1.125, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + reload_call, + ] + ) - def test_result_w_timeout(self): + def test_result_w_timeout_raises_concurrent_futures_timeout(self): import google.cloud.bigquery.client begun_resource = self._make_resource() + begun_resource["jobReference"]["location"] = "US" query_resource = { "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, @@ -1361,26 +1402,35 @@ def test_result_w_timeout(self): connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["jobReference"]["location"] = "US" - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): - job.result(timeout=1.0) - - self.assertEqual(len(connection.api_request.call_args_list), 3) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[1] - reload_request = connection.api_request.call_args_list[2] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual( - query_request[1]["path"], - "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), + with freezegun.freeze_time( + "1970-01-01 00:00:00", auto_tick_seconds=1.0 + ), self.assertRaises(concurrent.futures.TimeoutError): + job.result(timeout=1.125) + + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={"location": "US"}, + timeout=1.125, ) - self.assertEqual(query_request[1]["timeout"], 120) - self.assertEqual( - query_request[1]["timeout"], - google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + # Timeout before we can reload with the final job state. + ] ) - self.assertEqual(reload_request[1]["method"], "GET") def test_result_w_page_size(self): # Arrange diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 671b829f7..9f661dca7 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -246,7 +246,9 @@ def test__to_query_job_dry_run(): @pytest.mark.parametrize( ("completed", "expected_state"), ( - (True, "DONE"), + # Always pending so that we refresh the job state to get the + # destination table or job stats in case it's needed. + (True, "PENDING"), (False, "PENDING"), ), ) @@ -843,6 +845,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): "jobId": "response-job-id", "location": "response-location", }, + "status": {"state": "DONE"}, }, { "rows": [ @@ -896,18 +899,10 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): timeout=None, ) - # TODO(swast): Fetching job metadata isn't necessary in this case. - jobs_get_path = "/projects/response-project/jobs/response-job-id" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.job.reload", - span_attributes={"path": jobs_get_path}, - job_ref=mock.ANY, - method="GET", - path=jobs_get_path, - query_params={"location": "response-location"}, - timeout=None, - ) + # Note: There is no get call to + # "/projects/response-project/jobs/response-job-id", because fetching job + # metadata isn't necessary in this case. The job already completed in + # jobs.query and we don't need the full job metadata in query_and_wait. # Fetch the remaining two pages. jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" @@ -944,6 +939,7 @@ def test_query_and_wait_incomplete_query(): Client._list_rows_from_query_results, client ) client._call_api.side_effect = ( + # jobs.query { "jobReference": { "projectId": "response-project", @@ -952,6 +948,16 @@ def test_query_and_wait_incomplete_query(): }, "jobComplete": False, }, + # jobs.get + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "status": {"state": "RUNNING"}, + }, + # jobs.getQueryResults with max_results=0 { "jobReference": { "projectId": "response-project", @@ -968,13 +974,18 @@ def test_query_and_wait_incomplete_query(): ], }, }, + # jobs.get { "jobReference": { "projectId": "response-project", "jobId": "response-job-id", "location": "response-location", }, + "status": {"state": "DONE"}, }, + # jobs.getQueryResults + # Note: No more jobs.getQueryResults with max_results=0 because the + # previous call to jobs.getQueryResults returned with jobComplete=True. { "rows": [ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, @@ -987,6 +998,7 @@ def test_query_and_wait_incomplete_query(): "totalRows": 2, "pageToken": "page-2", }, + # jobs.getQueryResults { "rows": [ {"f": [{"v": "Pearl Slaghoople"}, {"v": "53"}]}, diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index d7049c5ca..43ddae1dc 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -24,7 +24,7 @@ from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers -from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY +import google.cloud.bigquery.retry from .helpers import make_connection @@ -126,6 +126,168 @@ def api_request(method, path, query_params=None, data=None, **kw): assert job.job_id == orig_job_id +def test_query_retry_with_default_retry_and_ambiguous_errors_only_retries_with_failed_job( + client, monkeypatch +): + """ + Some errors like 'rateLimitExceeded' can be ambiguous. Make sure we only + retry the job when we know for sure that the job has failed for a retriable + reason. We can only be sure after a "successful" call to jobs.get to fetch + the failed job status. + """ + job_counter = 0 + + def make_job_id(*args, **kwargs): + nonlocal job_counter + job_counter += 1 + return f"{job_counter}" + + monkeypatch.setattr(_job_helpers, "make_job_id", make_job_id) + + project = client.project + job_reference_1 = {"projectId": project, "jobId": "1", "location": "test-loc"} + job_reference_2 = {"projectId": project, "jobId": "2", "location": "test-loc"} + NUM_API_RETRIES = 2 + + # This error is modeled after a real customer exception in + # https://github.com/googleapis/python-bigquery/issues/707. + internal_error = google.api_core.exceptions.InternalServerError( + "Job failed just because...", + errors=[ + {"reason": "internalError"}, + ], + ) + responses = [ + # jobs.insert + {"jobReference": job_reference_1, "status": {"state": "PENDING"}}, + # jobs.get + {"jobReference": job_reference_1, "status": {"state": "RUNNING"}}, + # jobs.getQueryResults x2 + # + # Note: internalError is ambiguous in jobs.getQueryResults. The + # problem could be at the Google Frontend level or it could be because + # the job has failed due to some transient issues and the BigQuery + # REST API is translating the job failed status into failure HTTP + # codes. + # + # TODO(GH#1903): We shouldn't retry nearly this many times when we get + # ambiguous errors from jobs.getQueryResults. + # See: https://github.com/googleapis/python-bigquery/issues/1903 + internal_error, + internal_error, + # jobs.get -- the job has failed + { + "jobReference": job_reference_1, + "status": {"state": "DONE", "errorResult": {"reason": "internalError"}}, + }, + # jobs.insert + {"jobReference": job_reference_2, "status": {"state": "PENDING"}}, + # jobs.get + {"jobReference": job_reference_2, "status": {"state": "RUNNING"}}, + # jobs.getQueryResults + {"jobReference": job_reference_2, "jobComplete": True}, + # jobs.get + {"jobReference": job_reference_2, "status": {"state": "DONE"}}, + ] + + conn = client._connection = make_connection() + conn.api_request.side_effect = responses + + with freezegun.freeze_time( + # Note: because of exponential backoff and a bit of jitter, + # NUM_API_RETRIES will get less accurate the greater the value. + # We add 1 because we know there will be at least some additional + # calls to fetch the time / sleep before the retry deadline is hit. + auto_tick_seconds=( + google.cloud.bigquery.retry._DEFAULT_RETRY_DEADLINE / NUM_API_RETRIES + ) + + 1, + ): + job = client.query("select 1") + job.result() + + conn.api_request.assert_has_calls( + [ + # jobs.insert + mock.call( + method="POST", + path="/projects/PROJECT/jobs", + data={ + "jobReference": {"jobId": "1", "projectId": "PROJECT"}, + "configuration": { + "query": {"useLegacySql": False, "query": "select 1"} + }, + }, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/1", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.getQueryResults x2 + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + # jobs.get -- verify that the job has failed + mock.call( + method="GET", + path="/projects/PROJECT/jobs/1", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.insert + mock.call( + method="POST", + path="/projects/PROJECT/jobs", + data={ + "jobReference": { + # Make sure that we generated a new job ID. + "jobId": "2", + "projectId": "PROJECT", + }, + "configuration": { + "query": {"useLegacySql": False, "query": "select 1"} + }, + }, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/2", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.getQueryResults + mock.call( + method="GET", + path="/projects/PROJECT/queries/2", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/2", + query_params={"location": "test-loc"}, + timeout=None, + ), + ] + ) + + # With job_retry_on_query, we're testing 4 scenarios: # - Pass None retry to `query`. # - Pass None retry to `result`. @@ -187,8 +349,8 @@ def api_request(method, path, query_params=None, data=None, **kw): with pytest.raises(google.api_core.exceptions.RetryError): job.result() - # We never got a successful job, so the job id never changed: - assert job.job_id == orig_job_id + # We retried the job at least once, so we should have generated a new job ID. + assert job.job_id != orig_job_id # We failed because we couldn't succeed after 120 seconds. # But we can try again: @@ -301,8 +463,8 @@ def test_query_and_wait_retries_job_for_DDL_queries(): job_config=None, page_size=None, max_results=None, - retry=DEFAULT_JOB_RETRY, - job_retry=DEFAULT_JOB_RETRY, + retry=google.cloud.bigquery.retry.DEFAULT_RETRY, + job_retry=google.cloud.bigquery.retry.DEFAULT_JOB_RETRY, ) assert len(list(rows)) == 4 From bd0814caf2b3bf907006cd50129f7798874571d5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 12:24:03 -0500 Subject: [PATCH 295/536] chore(main): release 3.21.0 (#1883) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 22 ++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95af2d213..0fc77f7c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,28 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.21.0](https://github.com/googleapis/python-bigquery/compare/v3.20.1...v3.21.0) (2024-04-18) + + +### Features + +* Add compression option ZSTD. ([#1890](https://github.com/googleapis/python-bigquery/issues/1890)) ([5ed9cce](https://github.com/googleapis/python-bigquery/commit/5ed9ccee204b7cf8e96cb0e050f6830c05f3b4fd)) +* Adds billing to opentel ([#1889](https://github.com/googleapis/python-bigquery/issues/1889)) ([38697fb](https://github.com/googleapis/python-bigquery/commit/38697fb942516fc2f6f5e21e19a11811fbaeb1f4)) +* Support RANGE in queries Part 1: JSON ([#1884](https://github.com/googleapis/python-bigquery/issues/1884)) ([3634405](https://github.com/googleapis/python-bigquery/commit/3634405fa1b40ae5f69b06d7c7f8de4e3d246d92)) + + +### Bug Fixes + +* Add types to DatasetReference constructor ([#1601](https://github.com/googleapis/python-bigquery/issues/1601)) ([bf8861c](https://github.com/googleapis/python-bigquery/commit/bf8861c3473a1af978db7a06463ddc0bad86f326)) +* Creates linting-typing.cfg in presubmit ([#1881](https://github.com/googleapis/python-bigquery/issues/1881)) ([c852c15](https://github.com/googleapis/python-bigquery/commit/c852c153c55025ba1187d61e313ead2308616c55)) +* Remove duplicate key time_partitioning from Table._PROPERTY_TO_A… ([#1898](https://github.com/googleapis/python-bigquery/issues/1898)) ([82ae908](https://github.com/googleapis/python-bigquery/commit/82ae908fbf3b2361343fff1859d3533383dc50ec)) +* Retry query jobs that fail even with ambiguous `jobs.getQueryResults` REST errors ([#1903](https://github.com/googleapis/python-bigquery/issues/1903), [#1900](https://github.com/googleapis/python-bigquery/issues/1900)) ([1367b58](https://github.com/googleapis/python-bigquery/commit/1367b584b68d917ec325ce4383a0e9a36205b894)) + + +### Performance Improvements + +* Avoid unnecessary API call in `QueryJob.result()` when job is already finished ([#1900](https://github.com/googleapis/python-bigquery/issues/1900)) ([1367b58](https://github.com/googleapis/python-bigquery/commit/1367b584b68d917ec325ce4383a0e9a36205b894)) + ## [3.20.1](https://github.com/googleapis/python-bigquery/compare/v3.20.0...v3.20.1) (2024-04-01) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 55093e390..29c08b51f 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.20.1" +__version__ = "3.21.0" From 5251b5dbb254732ea730bab664ad319bd5be47e7 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 18 Apr 2024 13:14:56 -0700 Subject: [PATCH 296/536] feat: support RANGE in queries Part 2: Arrow (#1868) * feat: support range in queries as dict * fix sys tests * lint * add arrow support * fix python 3.7 test error * print dependencies in sys test * add unit test and docs * fix unit test * add func docs * add sys test for tabledata.list in arrow * add sys test for tabledata.list as iterator * lint * fix docs error * fix docstring * fix docstring * fix docstring * docs * docs * docs * move dtypes mapping code * address comment * address comment * fix pytest error * Revert "move dtypes mapping code" This reverts commit c46c65c822b3c8295d5d6650b1c9c97d35d2ba5b. * remove commented out assertions * typo and formats * add None-check for range_element_type and add unit tests * change test skip condition * fix test error * change test skip condition * change test skip condition * change decorator order * use a different way to construct test data * fix error message and add warning number check * add warning number check and comments --- google/cloud/bigquery/_helpers.py | 16 ++- google/cloud/bigquery/_pandas_helpers.py | 33 ++++++ google/cloud/bigquery/dbapi/_helpers.py | 14 ++- google/cloud/bigquery/enums.py | 9 ++ google/cloud/bigquery/job/query.py | 67 +++++++++++ google/cloud/bigquery/query.py | 11 +- google/cloud/bigquery/table.py | 137 +++++++++++++++++++++++ noxfile.py | 3 + tests/data/scalars.csv | 2 + tests/data/scalars_schema_csv.json | 10 ++ tests/system/conftest.py | 22 +++- tests/system/test_arrow.py | 27 +++++ tests/system/test_list_rows.py | 14 +++ tests/unit/test__pandas_helpers.py | 61 ++++++++++ tests/unit/test_table.py | 115 ++++++++++++++++++- 15 files changed, 516 insertions(+), 25 deletions(-) create mode 100644 tests/data/scalars.csv create mode 100644 tests/data/scalars_schema_csv.json diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 0572867d7..083eb9f9d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -66,6 +66,8 @@ _UNIVERSE_DOMAIN_ENV = "GOOGLE_CLOUD_UNIVERSE_DOMAIN" """Environment variable for setting universe domain.""" +_SUPPORTED_RANGE_ELEMENTS = {"TIMESTAMP", "DATETIME", "DATE"} + def _get_client_universe( client_options: Optional[Union[client_options_lib.ClientOptions, dict]] @@ -310,17 +312,13 @@ def _json_from_json(value, field): def _range_element_from_json(value, field): - """Coerce 'value' to a range element value, if set or not nullable.""" + """Coerce 'value' to a range element value.""" if value == "UNBOUNDED": return None - elif field.element_type == "DATE": - return _date_from_json(value, None) - elif field.element_type == "DATETIME": - return _datetime_from_json(value, None) - elif field.element_type == "TIMESTAMP": - return _timestamp_from_json(value, None) + if field.element_type in _SUPPORTED_RANGE_ELEMENTS: + return _CELLDATA_FROM_JSON[field.element_type](value, field.element_type) else: - raise ValueError(f"Unsupported range field type: {value}") + raise ValueError(f"Unsupported range element type: {field.element_type}") def _range_from_json(value, field): @@ -344,7 +342,7 @@ def _range_from_json(value, field): end = _range_element_from_json(end, field.range_element_type) return {"start": start, "end": end} else: - raise ValueError(f"Unknown range format: {value}") + raise ValueError(f"Unknown format for range value: {value}") else: return None diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 3b58d3736..8395478fb 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -142,6 +142,17 @@ def bq_to_arrow_struct_data_type(field): return pyarrow.struct(arrow_fields) +def bq_to_arrow_range_data_type(field): + if field is None: + raise ValueError( + "Range element type cannot be None, must be one of " + "DATE, DATETIME, or TIMESTAMP" + ) + element_type = field.element_type.upper() + arrow_element_type = _pyarrow_helpers.bq_to_arrow_scalars(element_type)() + return pyarrow.struct([("start", arrow_element_type), ("end", arrow_element_type)]) + + def bq_to_arrow_data_type(field): """Return the Arrow data type, corresponding to a given BigQuery column. @@ -160,6 +171,9 @@ def bq_to_arrow_data_type(field): if field_type_upper in schema._STRUCT_TYPES: return bq_to_arrow_struct_data_type(field) + if field_type_upper == "RANGE": + return bq_to_arrow_range_data_type(field.range_element_type) + data_type_constructor = _pyarrow_helpers.bq_to_arrow_scalars(field_type_upper) if data_type_constructor is None: return None @@ -220,6 +234,9 @@ def default_types_mapper( datetime_dtype: Union[Any, None] = None, time_dtype: Union[Any, None] = None, timestamp_dtype: Union[Any, None] = None, + range_date_dtype: Union[Any, None] = None, + range_datetime_dtype: Union[Any, None] = None, + range_timestamp_dtype: Union[Any, None] = None, ): """Create a mapping from pyarrow types to pandas types. @@ -274,6 +291,22 @@ def types_mapper(arrow_data_type): elif time_dtype is not None and pyarrow.types.is_time(arrow_data_type): return time_dtype + elif pyarrow.types.is_struct(arrow_data_type): + if range_datetime_dtype is not None and arrow_data_type.equals( + range_datetime_dtype.pyarrow_dtype + ): + return range_datetime_dtype + + elif range_date_dtype is not None and arrow_data_type.equals( + range_date_dtype.pyarrow_dtype + ): + return range_date_dtype + + elif range_timestamp_dtype is not None and arrow_data_type.equals( + range_timestamp_dtype.pyarrow_dtype + ): + return range_timestamp_dtype + return types_mapper diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 117fa8ae7..a4ab05ce8 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -277,12 +277,14 @@ def complex_query_parameter( param = query.ArrayQueryParameter( name, sub_type, - value - if isinstance(sub_type, query.ScalarQueryParameterType) - else [ - complex_query_parameter(None, v, sub_type._complex__src, base) - for v in value - ], + ( + value + if isinstance(sub_type, query.ScalarQueryParameterType) + else [ + complex_query_parameter(None, v, sub_type._complex__src, base) + for v in value + ] + ), ) elif type_type == STRUCT: if not isinstance(value, collections_abc.Mapping): diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 1abe28381..d8cbe9969 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -99,6 +99,15 @@ class DefaultPandasDTypes(enum.Enum): TIME_DTYPE = object() """Specifies default time dtype""" + RANGE_DATE_DTYPE = object() + """Specifies default range date dtype""" + + RANGE_DATETIME_DTYPE = object() + """Specifies default range datetime dtype""" + + RANGE_TIMESTAMP_DTYPE = object() + """Specifies default range timestamp dtype""" + class DestinationFormat(object): """The exported file format. The default value is :attr:`CSV`. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 7436b6013..09a69e11c 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1784,6 +1784,13 @@ def to_dataframe( datetime_dtype: Union[Any, None] = None, time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, timestamp_dtype: Union[Any, None] = None, + range_date_dtype: Union[Any, None] = DefaultPandasDTypes.RANGE_DATE_DTYPE, + range_datetime_dtype: Union[ + Any, None + ] = DefaultPandasDTypes.RANGE_DATETIME_DTYPE, + range_timestamp_dtype: Union[ + Any, None + ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1919,6 +1926,63 @@ def to_dataframe( .. versionadded:: 3.10.0 + range_date_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [("start", pyarrow.date32()), ("end", pyarrow.date32())] + )) + + to convert BigQuery RANGE type, instead of relying on + the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + + range_datetime_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [ + ("start", pyarrow.timestamp("us")), + ("end", pyarrow.timestamp("us")), + ] + )) + + to convert BigQuery RANGE type, instead of relying on + the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + + range_timestamp_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz="UTC")), + ("end", pyarrow.timestamp("us", tz="UTC")), + ] + )) + + to convert BigQuery RANGE type, instead of relying + on the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data @@ -1949,6 +2013,9 @@ def to_dataframe( datetime_dtype=datetime_dtype, time_dtype=time_dtype, timestamp_dtype=timestamp_dtype, + range_date_dtype=range_date_dtype, + range_datetime_dtype=range_datetime_dtype, + range_timestamp_dtype=range_timestamp_dtype, ) # If changing the signature of this method, make sure to apply the same diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 9c9402b74..9c59056fd 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -24,14 +24,13 @@ from google.cloud.bigquery._helpers import _rows_from_json from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM +from google.cloud.bigquery._helpers import _SUPPORTED_RANGE_ELEMENTS _SCALAR_VALUE_TYPE = Optional[ Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date] ] -_RANGE_ELEMENT_TYPE_STR = {"TIMESTAMP", "DATETIME", "DATE"} - class ConnectionProperty: """A connection-level property to customize query behavior. @@ -388,14 +387,14 @@ def _parse_range_element_type(self, type_): google.cloud.bigquery.query.ScalarQueryParameterType: Instance """ if isinstance(type_, str): - if type_ not in _RANGE_ELEMENT_TYPE_STR: + if type_ not in _SUPPORTED_RANGE_ELEMENTS: raise ValueError( "If given as a string, range element type must be one of " "'TIMESTAMP', 'DATE', or 'DATETIME'." ) return ScalarQueryParameterType(type_) elif isinstance(type_, ScalarQueryParameterType): - if type_._type not in _RANGE_ELEMENT_TYPE_STR: + if type_._type not in _SUPPORTED_RANGE_ELEMENTS: raise ValueError( "If given as a ScalarQueryParameter object, range element " "type must be one of 'TIMESTAMP', 'DATE', or 'DATETIME' " @@ -960,14 +959,14 @@ class RangeQueryParameter(_AbstractQueryParameter): @classmethod def _parse_range_element_type(self, range_element_type): if isinstance(range_element_type, str): - if range_element_type not in _RANGE_ELEMENT_TYPE_STR: + if range_element_type not in _SUPPORTED_RANGE_ELEMENTS: raise ValueError( "If given as a string, range_element_type must be one of " f"'TIMESTAMP', 'DATE', or 'DATETIME'. Got {range_element_type}." ) return RangeQueryParameterType(range_element_type) elif isinstance(range_element_type, RangeQueryParameterType): - if range_element_type.type_._type not in _RANGE_ELEMENT_TYPE_STR: + if range_element_type.type_._type not in _SUPPORTED_RANGE_ELEMENTS: raise ValueError( "If given as a RangeQueryParameterType object, " "range_element_type must be one of 'TIMESTAMP', 'DATE', " diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 73e755e9e..2f07bcc78 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2044,6 +2044,13 @@ def to_dataframe( datetime_dtype: Union[Any, None] = None, time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, timestamp_dtype: Union[Any, None] = None, + range_date_dtype: Union[Any, None] = DefaultPandasDTypes.RANGE_DATE_DTYPE, + range_datetime_dtype: Union[ + Any, None + ] = DefaultPandasDTypes.RANGE_DATETIME_DTYPE, + range_timestamp_dtype: Union[ + Any, None + ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -2183,6 +2190,63 @@ def to_dataframe( .. versionadded:: 3.10.0 + range_date_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [("start", pyarrow.date32()), ("end", pyarrow.date32())] + )) + + to convert BigQuery RANGE type, instead of relying on + the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + + range_datetime_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [ + ("start", pyarrow.timestamp("us")), + ("end", pyarrow.timestamp("us")), + ] + )) + + to convert BigQuery RANGE type, instead of relying on + the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + + range_timestamp_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype, such as: + + .. code-block:: python + + pandas.ArrowDtype(pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz="UTC")), + ("end", pyarrow.timestamp("us", tz="UTC")), + ] + )) + + to convert BigQuery RANGE type, instead of relying + on the default ``object``. If you explicitly set the value to + ``None``, the data type will be ``object``. BigQuery Range type + can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + + .. versionadded:: 3.21.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -2214,6 +2278,69 @@ def to_dataframe( if time_dtype is DefaultPandasDTypes.TIME_DTYPE: time_dtype = db_dtypes.TimeDtype() + if range_date_dtype is DefaultPandasDTypes.RANGE_DATE_DTYPE: + try: + range_date_dtype = pandas.ArrowDtype( + pyarrow.struct( + [("start", pyarrow.date32()), ("end", pyarrow.date32())] + ) + ) + except AttributeError: + # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 + # only supports upto pandas 1.3. If pandas.ArrowDtype is not + # present, we raise a warning and set range_date_dtype to None. + msg = ( + "Unable to find class ArrowDtype in pandas, setting " + "range_date_dtype to be None. To use ArrowDtype, please " + "use pandas >= 1.5 and python >= 3.8." + ) + warnings.warn(msg) + range_date_dtype = None + + if range_datetime_dtype is DefaultPandasDTypes.RANGE_DATETIME_DTYPE: + try: + range_datetime_dtype = pandas.ArrowDtype( + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us")), + ("end", pyarrow.timestamp("us")), + ] + ) + ) + except AttributeError: + # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 + # only supports upto pandas 1.3. If pandas.ArrowDtype is not + # present, we raise a warning and set range_datetime_dtype to None. + msg = ( + "Unable to find class ArrowDtype in pandas, setting " + "range_datetime_dtype to be None. To use ArrowDtype, " + "please use pandas >= 1.5 and python >= 3.8." + ) + warnings.warn(msg) + range_datetime_dtype = None + + if range_timestamp_dtype is DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE: + try: + range_timestamp_dtype = pandas.ArrowDtype( + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz="UTC")), + ("end", pyarrow.timestamp("us", tz="UTC")), + ] + ) + ) + except AttributeError: + # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 + # only supports upto pandas 1.3. If pandas.ArrowDtype is not + # present, we raise a warning and set range_timestamp_dtype to None. + msg = ( + "Unable to find class ArrowDtype in pandas, setting " + "range_timestamp_dtype to be None. To use ArrowDtype, " + "please use pandas >= 1.5 and python >= 3.8." + ) + warnings.warn(msg) + range_timestamp_dtype = None + if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE) @@ -2298,6 +2425,9 @@ def to_dataframe( datetime_dtype=datetime_dtype, time_dtype=time_dtype, timestamp_dtype=timestamp_dtype, + range_date_dtype=range_date_dtype, + range_datetime_dtype=range_datetime_dtype, + range_timestamp_dtype=range_timestamp_dtype, ), ) else: @@ -2502,6 +2632,9 @@ def to_dataframe( datetime_dtype=None, time_dtype=None, timestamp_dtype=None, + range_date_dtype=None, + range_datetime_dtype=None, + range_timestamp_dtype=None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2519,6 +2652,9 @@ def to_dataframe( datetime_dtype (Any): Ignored. Added for compatibility with RowIterator. time_dtype (Any): Ignored. Added for compatibility with RowIterator. timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator. + range_date_dtype (Any): Ignored. Added for compatibility with RowIterator. + range_datetime_dtype (Any): Ignored. Added for compatibility with RowIterator. + range_timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. @@ -2541,6 +2677,7 @@ def to_geodataframe( dtypes (Any): Ignored. Added for compatibility with RowIterator. progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + geography_column (str): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/noxfile.py b/noxfile.py index 034bb843a..78a9ab5b6 100644 --- a/noxfile.py +++ b/noxfile.py @@ -208,6 +208,9 @@ def system(session): extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) + # print versions of all dependencies + session.run("python", "-m", "pip", "freeze") + # Run py.test against the system tests. session.run( "py.test", diff --git a/tests/data/scalars.csv b/tests/data/scalars.csv new file mode 100644 index 000000000..7af97583f --- /dev/null +++ b/tests/data/scalars.csv @@ -0,0 +1,2 @@ +"[2020-01-01, 2020-02-01)" + diff --git a/tests/data/scalars_schema_csv.json b/tests/data/scalars_schema_csv.json new file mode 100644 index 000000000..82b878d95 --- /dev/null +++ b/tests/data/scalars_schema_csv.json @@ -0,0 +1,10 @@ +[ + { + "mode" : "NULLABLE", + "name" : "range_date", + "type" : "RANGE", + "rangeElementType": { + "type": "DATE" + } + } + ] \ No newline at end of file diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 784a1dd5c..8efa042af 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -96,12 +96,14 @@ def load_scalars_table( project_id: str, dataset_id: str, data_path: str = "scalars.jsonl", + source_format=enums.SourceFormat.NEWLINE_DELIMITED_JSON, + schema_source="scalars_schema.json", ) -> str: - schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + schema = bigquery_client.schema_from_json(DATA_DIR / schema_source) table_id = data_path.replace(".", "_") + hex(random.randrange(1000000)) job_config = bigquery.LoadJobConfig() job_config.schema = schema - job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + job_config.source_format = source_format full_table_id = f"{project_id}.{dataset_id}.{table_id}" with open(DATA_DIR / data_path, "rb") as data_file: job = bigquery_client.load_table_from_file( @@ -151,6 +153,22 @@ def scalars_table_multi_location( return request.param, full_table_id +@pytest.fixture(scope="session") +def scalars_table_csv( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +): + full_table_id = load_scalars_table( + bigquery_client, + project_id, + dataset_id, + data_path="scalars.csv", + source_format=enums.SourceFormat.CSV, + schema_source="scalars_schema_csv.json", + ) + yield full_table_id + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + @pytest.fixture def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub): return replace_non_anum("_", request.node.name) diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index 8b88b6844..82cf11f85 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -167,3 +167,30 @@ def test_arrow_extension_types_same_for_storage_and_REST_APIs_894( b"ARROW:extension:name": b"google:sqlType:geography", b"ARROW:extension:metadata": b'{"encoding": "WKT"}', } + + +def test_list_rows_range_csv( + bigquery_client: bigquery.Client, + scalars_table_csv: str, +): + table_id = scalars_table_csv + + schema = [ + bigquery.SchemaField( + "range_date", enums.SqlTypeNames.RANGE, range_element_type="DATE" + ), + ] + + arrow_table = bigquery_client.list_rows( + table_id, + selected_fields=schema, + ).to_arrow() + + schema = arrow_table.schema + + expected_type = pyarrow.struct( + [("start", pyarrow.date32()), ("end", pyarrow.date32())] + ) + + range_type = schema.field("range_date").type + assert range_type == expected_type diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py index 4c08958c3..108b842ce 100644 --- a/tests/system/test_list_rows.py +++ b/tests/system/test_list_rows.py @@ -118,3 +118,17 @@ def test_list_rows_scalars_extreme( assert value == 4 else: assert value is None + + +def test_list_rows_range(bigquery_client: bigquery.Client, scalars_table_csv: str): + rows = bigquery_client.list_rows(scalars_table_csv) + rows = list(rows) + row = rows[0] + expected_range = { + "start": datetime.date(2020, 1, 1), + "end": datetime.date(2020, 2, 1), + } + assert row["range_date"] == expected_range + + row_null = rows[1] + assert row_null["range_date"] is None diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 5c13669f3..58d2b73b3 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -670,6 +670,67 @@ def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): assert array.to_pylist() == list(series) +@pytest.mark.parametrize( + "bq_schema,expected", + [ + ( + schema.SchemaField( + "field1", + "RANGE", + range_element_type=schema.FieldElementType("DATE"), + mode="NULLABLE", + ), + pyarrow.struct( + [ + ("start", pyarrow.date32()), + ("end", pyarrow.date32()), + ] + ), + ), + ( + schema.SchemaField( + "field2", + "RANGE", + range_element_type=schema.FieldElementType("DATETIME"), + mode="NULLABLE", + ), + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz=None)), + ("end", pyarrow.timestamp("us", tz=None)), + ] + ), + ), + ( + schema.SchemaField( + "field3", + "RANGE", + range_element_type=schema.FieldElementType("TIMESTAMP"), + mode="NULLABLE", + ), + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz="UTC")), + ("end", pyarrow.timestamp("us", tz="UTC")), + ] + ), + ), + ], +) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bq_to_arrow_data_type_w_range(module_under_test, bq_schema, expected): + actual = module_under_test.bq_to_arrow_data_type(bq_schema) + assert actual.equals(expected) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bq_to_arrow_data_type_w_range_no_element(module_under_test): + field = schema.SchemaField("field1", "RANGE", mode="NULLABLE") + with pytest.raises(ValueError, match="Range element type cannot be None"): + module_under_test.bq_to_arrow_data_type(field) + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3953170fd..099529f95 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -3503,7 +3503,11 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): user_warnings = [ warning for warning in warned if warning.category is UserWarning ] - self.assertEqual(len(user_warnings), 0) + # With Python 3.7 and 3.8, len(user_warnings) = 3. With pandas < 1.5, + # pandas.ArrowDtype is not supported. We raise warnings because + # range columns have to be converted to object. + # With higher Python versions and noextra tests, len(user_warnings) = 0 + self.assertIn(len(user_warnings), [0, 3]) self.assertEqual(len(df), 4) @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) @@ -3534,7 +3538,11 @@ def test_to_dataframe_no_tqdm(self): user_warnings = [ warning for warning in warned if warning.category is UserWarning ] - self.assertEqual(len(user_warnings), 1) + # With Python 3.7 and 3.8, len(user_warnings) = 4. With pandas < 1.5, + # pandas.ArrowDtype is not supported. We raise warnings because + # range columns have to be converted to object. + # With higher Python versions and noextra tests, len(user_warnings) = 1 + self.assertIn(len(user_warnings), [1, 4]) # Even though the progress bar won't show, downloading the dataframe # should still work. @@ -3653,6 +3661,9 @@ def test_to_dataframe_w_dtypes_mapper(self): SchemaField("datetime", "DATETIME"), SchemaField("time", "TIME"), SchemaField("timestamp", "TIMESTAMP"), + SchemaField("range_timestamp", "RANGE", range_element_type="TIMESTAMP"), + SchemaField("range_datetime", "RANGE", range_element_type="DATETIME"), + SchemaField("range_date", "RANGE", range_element_type="DATE"), ] row_data = [ [ @@ -3665,6 +3676,9 @@ def test_to_dataframe_w_dtypes_mapper(self): "1999-12-31T00:00:00.000000", "00:00:00.000000", "1433836800000000", + "[1433836800000000, 1433999900000000)", + "[2009-06-17T13:45:30, 2019-07-17T13:45:30)", + "[2020-10-01, 2021-10-02)", ], [ "Bharney Rhubble", @@ -3676,6 +3690,9 @@ def test_to_dataframe_w_dtypes_mapper(self): "4567-12-31T00:00:00.000000", "12:00:00.232413", "81953424000000000", + "[1433836800000000, UNBOUNDED)", + "[2009-06-17T13:45:30, UNBOUNDED)", + "[2020-10-01, UNBOUNDED)", ], [ "Wylma Phlyntstone", @@ -3687,6 +3704,9 @@ def test_to_dataframe_w_dtypes_mapper(self): "9999-12-31T23:59:59.999999", "23:59:59.999999", "253402261199999999", + "[UNBOUNDED, UNBOUNDED)", + "[UNBOUNDED, UNBOUNDED)", + "[UNBOUNDED, UNBOUNDED)", ], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] @@ -3724,6 +3744,39 @@ def test_to_dataframe_w_dtypes_mapper(self): if hasattr(pandas, "ArrowDtype") else None ), + range_date_dtype=( + pandas.ArrowDtype( + pyarrow.struct( + [("start", pyarrow.date32()), ("end", pyarrow.date32())] + ) + ) + if hasattr(pandas, "ArrowDtype") + else None + ), + range_datetime_dtype=( + pandas.ArrowDtype( + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us")), + ("end", pyarrow.timestamp("us")), + ] + ) + ) + if hasattr(pandas, "ArrowDtype") + else None + ), + range_timestamp_dtype=( + pandas.ArrowDtype( + pyarrow.struct( + [ + ("start", pyarrow.timestamp("us", tz="UTC")), + ("end", pyarrow.timestamp("us", tz="UTC")), + ] + ) + ) + if hasattr(pandas, "ArrowDtype") + else None + ), ) self.assertIsInstance(df, pandas.DataFrame) @@ -3791,6 +3844,52 @@ def test_to_dataframe_w_dtypes_mapper(self): ], ) self.assertEqual(df.timestamp.dtype.name, "timestamp[us, tz=UTC][pyarrow]") + + self.assertEqual( + list(df.range_timestamp), + [ + { + "start": datetime.datetime( + 2015, 6, 9, 8, 0, 0, tzinfo=datetime.timezone.utc + ), + "end": datetime.datetime( + 2015, 6, 11, 5, 18, 20, tzinfo=datetime.timezone.utc + ), + }, + { + "start": datetime.datetime( + 2015, 6, 9, 8, 0, 0, tzinfo=datetime.timezone.utc + ), + "end": None, + }, + {"start": None, "end": None}, + ], + ) + + self.assertEqual( + list(df.range_datetime), + [ + { + "start": datetime.datetime(2009, 6, 17, 13, 45, 30), + "end": datetime.datetime(2019, 7, 17, 13, 45, 30), + }, + {"start": datetime.datetime(2009, 6, 17, 13, 45, 30), "end": None}, + {"start": None, "end": None}, + ], + ) + + self.assertEqual( + list(df.range_date), + [ + { + "start": datetime.date(2020, 10, 1), + "end": datetime.date(2021, 10, 2), + }, + {"start": datetime.date(2020, 10, 1), "end": None}, + {"start": None, "end": None}, + ], + ) + else: self.assertEqual( list(df.date), @@ -3851,6 +3950,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self): SchemaField("datetime", "DATETIME"), SchemaField("time", "TIME"), SchemaField("timestamp", "TIMESTAMP"), + SchemaField("range_timestamp", "RANGE", range_element_type="TIMESTAMP"), + SchemaField("range_datetime", "RANGE", range_element_type="DATETIME"), + SchemaField("range_date", "RANGE", range_element_type="DATE"), ] row_data = [ [ @@ -3863,6 +3965,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self): "1999-12-31T00:00:00.000000", "23:59:59.999999", "1433836800000000", + "[1433836800000000, 1433999900000000)", + "[2009-06-17T13:45:30, 2019-07-17T13:45:30)", + "[2020-10-01, 2021-10-02)", ], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] @@ -3880,6 +3985,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self): datetime_dtype=None, time_dtype=None, timestamp_dtype=None, + range_timestamp_dtype=None, + range_datetime_dtype=None, + range_date_dtype=None, ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(df.complete.dtype.name, "bool") @@ -3891,6 +3999,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self): self.assertEqual(df.datetime.dtype.name, "datetime64[ns]") self.assertEqual(df.time.dtype.name, "object") self.assertEqual(df.timestamp.dtype.name, "datetime64[ns, UTC]") + self.assertEqual(df.range_timestamp.dtype.name, "object") + self.assertEqual(df.range_datetime.dtype.name, "object") + self.assertEqual(df.range_date.dtype.name, "object") def test_to_dataframe_w_unsupported_dtypes_mapper(self): pytest.importorskip("pandas") From 937ef1003bfdf60987010ad85ae19cf7609326b9 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 19 Apr 2024 15:06:02 +0200 Subject: [PATCH 297/536] chore(deps): update all dependencies (#1904) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index fee6806b7..3e9e59430 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.20.1 +google-cloud-bigquery==3.21.0 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index b3d9bc841..6502ba146 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -15,13 +15,13 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' google-api-core==2.18.0 google-auth==2.29.0 -google-cloud-bigquery==3.20.1 +google-cloud-bigquery==3.21.0 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 -grpcio==1.62.1 +grpcio==1.62.2 idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 @@ -44,7 +44,7 @@ pytz==2024.1 PyYAML==6.0.1 requests==2.31.0 rsa==4.9 -Shapely==2.0.3 +Shapely==2.0.4 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.11.0; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 61471a348..a431f466f 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.20.1 +google.cloud.bigquery==3.21.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 3960f47b9..dcce1e3ec 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.20.1 +google-cloud-bigquery==3.21.0 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 95f915364..fee0ce65a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.20.1 +google-cloud-bigquery==3.21.0 From 0e390668aaa411d59cc99514c89da8b23af5789a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 6 May 2024 10:16:17 -0400 Subject: [PATCH 298/536] chore(main): release 3.22.0 (#1905) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fc77f7c5..a201ef851 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.22.0](https://github.com/googleapis/python-bigquery/compare/v3.21.0...v3.22.0) (2024-04-19) + + +### Features + +* Support RANGE in queries Part 2: Arrow ([#1868](https://github.com/googleapis/python-bigquery/issues/1868)) ([5251b5d](https://github.com/googleapis/python-bigquery/commit/5251b5dbb254732ea730bab664ad319bd5be47e7)) + ## [3.21.0](https://github.com/googleapis/python-bigquery/compare/v3.20.1...v3.21.0) (2024-04-18) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 29c08b51f..b6c082ffc 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.21.0" +__version__ = "3.22.0" From 74e75e89ce3a5ac18112b2c1c33248445ff072e4 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 6 May 2024 16:46:27 -0700 Subject: [PATCH 299/536] feat: support insertAll for range (#1909) * feat: support insertAll for range * revert INTERVAL regex * lint * add unit test * lint --- google/cloud/bigquery/_helpers.py | 52 +++++++++++++- tests/unit/test__helpers.py | 114 +++++++++++++++++++++++++++++- 2 files changed, 162 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 083eb9f9d..668b4ca3d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -50,6 +50,7 @@ r"(?P-?\d+) " r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) +_RANGE_PATTERN = re.compile(r"\[.*, .*\)") BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" """Environment variable defining host for emulator.""" @@ -334,9 +335,8 @@ def _range_from_json(value, field): The parsed range object from ``value`` if the ``field`` is not null (otherwise it is :data:`None`). """ - range_literal = re.compile(r"\[.*, .*\)") if _not_null(value, field): - if range_literal.match(value): + if _RANGE_PATTERN.match(value): start, end = value[1:-1].split(", ") start = _range_element_from_json(start, field.range_element_type) end = _range_element_from_json(end, field.range_element_type) @@ -531,6 +531,52 @@ def _time_to_json(value): return value +def _range_element_to_json(value, element_type=None): + """Coerce 'value' to an JSON-compatible representation.""" + if value is None: + return None + elif isinstance(value, str): + if value.upper() in ("UNBOUNDED", "NULL"): + return None + else: + # We do not enforce range element value to be valid to reduce + # redundancy with backend. + return value + elif ( + element_type and element_type.element_type.upper() in _SUPPORTED_RANGE_ELEMENTS + ): + converter = _SCALAR_VALUE_TO_JSON_ROW.get(element_type.element_type.upper()) + return converter(value) + else: + raise ValueError( + f"Unsupported RANGE element type {element_type}, or " + "element type is empty. Must be DATE, DATETIME, or " + "TIMESTAMP" + ) + + +def _range_field_to_json(range_element_type, value): + """Coerce 'value' to an JSON-compatible representation.""" + if isinstance(value, str): + # string literal + if _RANGE_PATTERN.match(value): + start, end = value[1:-1].split(", ") + else: + raise ValueError(f"RANGE literal {value} has incorrect format") + elif isinstance(value, dict): + # dictionary + start = value.get("start") + end = value.get("end") + else: + raise ValueError( + f"Unsupported type of RANGE value {value}, must be " "string or dict" + ) + + start = _range_element_to_json(start, range_element_type) + end = _range_element_to_json(end, range_element_type) + return {"start": start, "end": end} + + # Converters used for scalar values marshalled to the BigQuery API, such as in # query parameters or the tabledata.insert API. _SCALAR_VALUE_TO_JSON_ROW = { @@ -676,6 +722,8 @@ def _single_field_to_json(field, row_value): if field.field_type == "RECORD": return _record_field_to_json(field.fields, row_value) + if field.field_type == "RANGE": + return _range_field_to_json(field.range_element_type, row_value) return _scalar_field_to_json(field, row_value) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index a50625e2a..1bf21479f 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1049,10 +1049,22 @@ def test_w_datetime(self): self.assertEqual(self._call_fut(when), "12:13:41") -def _make_field(field_type, mode="NULLABLE", name="testing", fields=()): +def _make_field( + field_type, + mode="NULLABLE", + name="testing", + fields=(), + range_element_type=None, +): from google.cloud.bigquery.schema import SchemaField - return SchemaField(name=name, field_type=field_type, mode=mode, fields=fields) + return SchemaField( + name=name, + field_type=field_type, + mode=mode, + fields=fields, + range_element_type=range_element_type, + ) class Test_scalar_field_to_json(unittest.TestCase): @@ -1251,6 +1263,98 @@ def test_w_dict_unknown_fields(self): ) +class Test_range_field_to_json(unittest.TestCase): + def _call_fut(self, field, value): + from google.cloud.bigquery._helpers import _range_field_to_json + + return _range_field_to_json(field, value) + + def test_w_date(self): + field = _make_field("RANGE", range_element_type="DATE") + start = datetime.date(2016, 12, 3) + original = {"start": start} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03", "end": None} + self.assertEqual(converted, expected) + + def test_w_date_string(self): + field = _make_field("RANGE", range_element_type="DATE") + original = {"start": "2016-12-03"} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03", "end": None} + self.assertEqual(converted, expected) + + def test_w_datetime(self): + field = _make_field("RANGE", range_element_type="DATETIME") + start = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456) + original = {"start": start} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456", "end": None} + self.assertEqual(converted, expected) + + def test_w_datetime_string(self): + field = _make_field("RANGE", range_element_type="DATETIME") + original = {"start": "2016-12-03T14:11:27.123456"} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456", "end": None} + self.assertEqual(converted, expected) + + def test_w_timestamp(self): + from google.cloud._helpers import UTC + + field = _make_field("RANGE", range_element_type="TIMESTAMP") + start = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC) + original = {"start": start} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456Z", "end": None} + self.assertEqual(converted, expected) + + def test_w_timestamp_string(self): + field = _make_field("RANGE", range_element_type="TIMESTAMP") + original = {"start": "2016-12-03T14:11:27.123456Z"} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456Z", "end": None} + self.assertEqual(converted, expected) + + def test_w_timestamp_float(self): + field = _make_field("RANGE", range_element_type="TIMESTAMP") + original = {"start": 12.34567} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": 12.34567, "end": None} + self.assertEqual(converted, expected) + + def test_w_string_literal(self): + field = _make_field("RANGE", range_element_type="DATE") + original = "[2016-12-03, UNBOUNDED)" + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03", "end": None} + self.assertEqual(converted, expected) + + def test_w_unsupported_range_element_type(self): + field = _make_field("RANGE", range_element_type="TIME") + with self.assertRaises(ValueError): + self._call_fut( + field.range_element_type, + {"start": datetime.time(12, 13, 41)}, + ) + + def test_w_no_range_element_type(self): + field = _make_field("RANGE") + with self.assertRaises(ValueError): + self._call_fut(field.range_element_type, "2016-12-03") + + def test_w_incorrect_literal_format(self): + field = _make_field("RANGE", range_element_type="DATE") + original = "[2016-12-03, UNBOUNDED]" + with self.assertRaises(ValueError): + self._call_fut(field.range_element_type, original) + + def test_w_unsupported_representation(self): + field = _make_field("RANGE", range_element_type="DATE") + with self.assertRaises(ValueError): + self._call_fut(field.range_element_type, object()) + + class Test_field_to_json(unittest.TestCase): def _call_fut(self, field, value): from google.cloud.bigquery._helpers import _field_to_json @@ -1285,6 +1389,12 @@ def test_w_scalar(self): converted = self._call_fut(field, original) self.assertEqual(converted, str(original)) + def test_w_range(self): + field = _make_field("RANGE", range_element_type="DATE") + original = {"start": "2016-12-03", "end": "2024-12-03"} + converted = self._call_fut(field, original) + self.assertEqual(converted, original) + class Test_snake_to_camel_case(unittest.TestCase): def _call_fut(self, value): From a86d7b96813f67fea28b46c5252416222edca9a6 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 9 May 2024 11:42:19 -0700 Subject: [PATCH 300/536] fix: add pyarrow version check for range support (#1914) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add pyarrow version check for range support * add comment why we are making a separate constant --------- Co-authored-by: Tim Sweña (Swast) --- google/cloud/bigquery/_versions_helpers.py | 14 +++++++ google/cloud/bigquery/table.py | 48 +++++++--------------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/google/cloud/bigquery/_versions_helpers.py b/google/cloud/bigquery/_versions_helpers.py index 50d5961b3..72d4c921d 100644 --- a/google/cloud/bigquery/_versions_helpers.py +++ b/google/cloud/bigquery/_versions_helpers.py @@ -26,6 +26,9 @@ _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") _MIN_PANDAS_VERSION = packaging.version.Version("1.1.0") +_MIN_PANDAS_VERSION_RANGE = packaging.version.Version("1.5.0") +_MIN_PYARROW_VERSION_RANGE = packaging.version.Version("10.0.1") + class PyarrowVersions: """Version comparisons for pyarrow package.""" @@ -234,3 +237,14 @@ def try_import(self, raise_if_error: bool = False) -> Any: PANDAS_VERSIONS = PandasVersions() + +# Since RANGE support in pandas requires specific versions +# of both pyarrow and pandas, we make this a separate +# constant instead of as a property of PANDAS_VERSIONS +# or PYARROW_VERSIONS. +SUPPORTS_RANGE_PYARROW = ( + PANDAS_VERSIONS.try_import() is not None + and PANDAS_VERSIONS.installed_version >= _MIN_PANDAS_VERSION_RANGE + and PYARROW_VERSIONS.try_import() is not None + and PYARROW_VERSIONS.installed_version >= _MIN_PYARROW_VERSION_RANGE +) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 2f07bcc78..ad1253195 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -100,6 +100,12 @@ "because the necessary `__from_arrow__` attribute is missing." ) +_RANGE_PYARROW_WARNING = ( + "Unable to represent RANGE schema as struct using pandas ArrowDtype. Using " + "`object` instead. To use ArrowDtype, use pandas >= 1.5 and " + "pyarrow >= 10.0.1." +) + # How many of the total rows need to be downloaded already for us to skip # calling the BQ Storage API? ALMOST_COMPLETELY_CACHED_RATIO = 0.333 @@ -2279,26 +2285,18 @@ def to_dataframe( time_dtype = db_dtypes.TimeDtype() if range_date_dtype is DefaultPandasDTypes.RANGE_DATE_DTYPE: - try: + if _versions_helpers.SUPPORTS_RANGE_PYARROW: range_date_dtype = pandas.ArrowDtype( pyarrow.struct( [("start", pyarrow.date32()), ("end", pyarrow.date32())] ) ) - except AttributeError: - # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 - # only supports upto pandas 1.3. If pandas.ArrowDtype is not - # present, we raise a warning and set range_date_dtype to None. - msg = ( - "Unable to find class ArrowDtype in pandas, setting " - "range_date_dtype to be None. To use ArrowDtype, please " - "use pandas >= 1.5 and python >= 3.8." - ) - warnings.warn(msg) + else: + warnings.warn(_RANGE_PYARROW_WARNING) range_date_dtype = None if range_datetime_dtype is DefaultPandasDTypes.RANGE_DATETIME_DTYPE: - try: + if _versions_helpers.SUPPORTS_RANGE_PYARROW: range_datetime_dtype = pandas.ArrowDtype( pyarrow.struct( [ @@ -2307,20 +2305,12 @@ def to_dataframe( ] ) ) - except AttributeError: - # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 - # only supports upto pandas 1.3. If pandas.ArrowDtype is not - # present, we raise a warning and set range_datetime_dtype to None. - msg = ( - "Unable to find class ArrowDtype in pandas, setting " - "range_datetime_dtype to be None. To use ArrowDtype, " - "please use pandas >= 1.5 and python >= 3.8." - ) - warnings.warn(msg) + else: + warnings.warn(_RANGE_PYARROW_WARNING) range_datetime_dtype = None if range_timestamp_dtype is DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE: - try: + if _versions_helpers.SUPPORTS_RANGE_PYARROW: range_timestamp_dtype = pandas.ArrowDtype( pyarrow.struct( [ @@ -2329,16 +2319,8 @@ def to_dataframe( ] ) ) - except AttributeError: - # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7 - # only supports upto pandas 1.3. If pandas.ArrowDtype is not - # present, we raise a warning and set range_timestamp_dtype to None. - msg = ( - "Unable to find class ArrowDtype in pandas, setting " - "range_timestamp_dtype to be None. To use ArrowDtype, " - "please use pandas >= 1.5 and python >= 3.8." - ) - warnings.warn(msg) + else: + warnings.warn(_RANGE_PYARROW_WARNING) range_timestamp_dtype = None if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): From b739596f37b8c00b375cc811c316b618097d761a Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 15 May 2024 07:54:36 -0400 Subject: [PATCH 301/536] fix: edit presubmit for to simplify configuration (#1915) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add new presubmit for test purposes * add additional sessions * Update .kokoro/presubmit/presubmit-2.cfg * Update .kokoro/presubmit/presubmit-2.cfg * added timer to nox sessions * Update .kokoro/presubmit/presubmit-2.cfg * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removes references to most environment variables * testing the use of base names for the nox sessions * removes references to unneeded linting and typing env variables * change file name and update env_vars in presubmit-2 * remove timed decorators * revert several files * Update noxfile.py * remove test, remove unneeded vars, etc --------- Co-authored-by: Owl Bot --- .kokoro/presubmit/presubmit.cfg | 12 ++---------- noxfile.py | 32 -------------------------------- 2 files changed, 2 insertions(+), 42 deletions(-) diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index fa39b1118..ce3953120 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -2,14 +2,6 @@ # Disable system tests. env_vars: { - key: "RUN_SYSTEM_TESTS" - value: "false" -} -env_vars: { - key: "RUN_SNIPPETS_TESTS" - value: "false" -} -env_vars: { - key: "RUN_LINTING_TYPING_TESTS" - value: "false" + key: "NOX_SESSION" + value: "unit_noextras unit cover docs" } diff --git a/noxfile.py b/noxfile.py index 78a9ab5b6..02655a7b7 100644 --- a/noxfile.py +++ b/noxfile.py @@ -132,10 +132,6 @@ def unit_noextras(session): def mypy(session): """Run type checks with mypy.""" - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install("-e", ".[all]") session.install(MYPY_VERSION) @@ -157,10 +153,6 @@ def pytype(session): # recent version avoids the error until a possibly better fix is found. # https://github.com/googleapis/python-bigquery/issues/655 - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) @@ -176,10 +168,6 @@ def system(session): CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) - # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true. - if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false": - session.skip("RUN_SYSTEM_TESTS is set to false, skipping") - # Sanity check: Only run system tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable.") @@ -224,10 +212,6 @@ def system(session): def mypy_samples(session): """Run type checks with mypy.""" - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install("pytest") for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): session.install("-r", str(requirements_path)) @@ -263,10 +247,6 @@ def mypy_samples(session): def snippets(session): """Run the snippets test suite.""" - # Check the value of `RUN_SNIPPETS_TESTS` env var. It defaults to true. - if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false": - session.skip("RUN_SNIPPETS_TESTS is set to false, skipping") - constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) @@ -409,10 +389,6 @@ def lint(session): serious code quality issues. """ - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install("flake8", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) @@ -427,10 +403,6 @@ def lint(session): def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install("docutils", "Pygments") session.run("python", "setup.py", "check", "--restructuredtext", "--strict") @@ -441,10 +413,6 @@ def blacken(session): Format code to uniform standard. """ - # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. - if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": - session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") - session.install(BLACK_VERSION) session.run("black", *BLACK_PATHS) From ea750e0248473b6207b8517aa7ea1cf4e19bccf2 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 16 May 2024 08:02:36 -0400 Subject: [PATCH 302/536] feat: adds timer decorator to facilitate debugging (#1917) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: adds timer decorator to sessions * updates _calculate_duration function * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- noxfile.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/noxfile.py b/noxfile.py index 02655a7b7..5f88e46a0 100644 --- a/noxfile.py +++ b/noxfile.py @@ -14,11 +14,13 @@ from __future__ import absolute_import +from functools import wraps import pathlib import os import re import shutil import nox +import time MYPY_VERSION = "mypy==1.6.1" @@ -40,6 +42,27 @@ UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.12"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() + +def _calculate_duration(func): + """This decorator prints the execution time for the decorated function.""" + + @wraps(func) + def wrapper(*args, **kwargs): + start = time.monotonic() + result = func(*args, **kwargs) + end = time.monotonic() + total_seconds = round(end - start) + hours = total_seconds // 3600 # Integer division to get hours + remaining_seconds = total_seconds % 3600 # Modulo to find remaining seconds + minutes = remaining_seconds // 60 + seconds = remaining_seconds % 60 + human_time = f"{hours:}:{minutes:0>2}:{seconds:0>2}" + print(f"Session ran in {total_seconds} seconds ({human_time})") + return result + + return wrapper + + # 'docfx' is excluded since it only needs to run in 'docs-presubmit' nox.options.sessions = [ "unit_noextras", @@ -105,6 +128,7 @@ def default(session, install_extras=True): @nox.session(python=UNIT_TEST_PYTHON_VERSIONS) +@_calculate_duration def unit(session): """Run the unit test suite.""" @@ -112,6 +136,7 @@ def unit(session): @nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]]) +@_calculate_duration def unit_noextras(session): """Run the unit test suite.""" @@ -129,6 +154,7 @@ def unit_noextras(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def mypy(session): """Run type checks with mypy.""" @@ -147,6 +173,7 @@ def mypy(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def pytype(session): """Run type checks with pytype.""" # An indirect dependecy attrs==21.1.0 breaks the check, and installing a less @@ -161,6 +188,7 @@ def pytype(session): @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) +@_calculate_duration def system(session): """Run the system test suite.""" @@ -209,6 +237,7 @@ def system(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def mypy_samples(session): """Run type checks with mypy.""" @@ -244,6 +273,7 @@ def mypy_samples(session): @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) +@_calculate_duration def snippets(session): """Run the snippets test suite.""" @@ -279,6 +309,7 @@ def snippets(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def cover(session): """Run the final coverage report. @@ -292,6 +323,7 @@ def cover(session): @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) +@_calculate_duration def prerelease_deps(session): """Run all tests with prerelease versions of dependencies installed. @@ -382,6 +414,7 @@ def prerelease_deps(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def lint(session): """Run linters. @@ -400,6 +433,7 @@ def lint(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" @@ -408,6 +442,7 @@ def lint_setup_py(session): @nox.session(python=DEFAULT_PYTHON_VERSION) +@_calculate_duration def blacken(session): """Run black. Format code to uniform standard. @@ -418,6 +453,7 @@ def blacken(session): @nox.session(python="3.9") +@_calculate_duration def docs(session): """Build the docs.""" @@ -454,6 +490,7 @@ def docs(session): @nox.session(python="3.10") +@_calculate_duration def docfx(session): """Build the docfx yaml files for this library.""" From 01fc0ef9341c3d31bc61e069e599c498242893fc Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 16 May 2024 17:29:02 +0200 Subject: [PATCH 303/536] chore(deps): update all dependencies (#1916) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 13 +++++++------ samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 2 +- 10 files changed, 18 insertions(+), 17 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 9142d4905..4487e2ef3 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.1.1; python_version >= '3.8' +pytest==8.2.0; python_version >= '3.8' mock==5.1.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 3e9e59430..716f088ac 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.21.0 +google-cloud-bigquery==3.22.0 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index f052969d3..3689fda4e 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.1.1; python_version >= '3.8' +pytest==8.2.0; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6502ba146..8c268759e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,16 +12,17 @@ Fiona==1.9.6 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' -geopandas==0.14.3; python_version >= '3.9' -google-api-core==2.18.0 +geopandas==0.14.4; python_version >= '3.9' +google-api-core==2.19.0 google-auth==2.29.0 -google-cloud-bigquery==3.21.0 -google-cloud-bigquery-storage==2.24.0 +google-cloud-bigquery==3.22.0 +google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 -grpcio==1.62.2 +grpcio==1.62.2; python_version == '3.7' +grpcio==1.63.0; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 @@ -31,7 +32,7 @@ pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==15.0.2; python_version >= '3.8' +pyarrow==16.0.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 9142d4905..4487e2ef3 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.1.1; python_version >= '3.8' +pytest==8.2.0; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index a431f466f..67be479e1 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.21.0 -google-cloud-bigquery-storage==2.24.0 +google.cloud.bigquery==3.22.0 +google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 9142d4905..4487e2ef3 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.1.1; python_version >= '3.8' +pytest==8.2.0; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index dcce1e3ec..a60175de5 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.21.0 -google-cloud-bigquery-storage==2.24.0 +google-cloud-bigquery==3.22.0 +google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 0343ab89a..3c8fcc27d 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.1.1; python_version >= '3.8' +pytest==8.2.0; python_version >= '3.8' mock==5.1.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index fee0ce65a..a5e90118f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.21.0 +google-cloud-bigquery==3.22.0 From a429e8fd997a8850d15d434089869cf24e53c9e6 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 16 May 2024 13:04:08 -0400 Subject: [PATCH 304/536] chore(main): release 3.23.0 (#1911) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Chalmer Lowe --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a201ef851..804c0ae1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.23.0](https://github.com/googleapis/python-bigquery/compare/v3.22.0...v3.23.0) (2024-05-16) + + +### Features + +* Adds timer decorator to facilitate debugging ([#1917](https://github.com/googleapis/python-bigquery/issues/1917)) ([ea750e0](https://github.com/googleapis/python-bigquery/commit/ea750e0248473b6207b8517aa7ea1cf4e19bccf2)) +* Support insertAll for range ([#1909](https://github.com/googleapis/python-bigquery/issues/1909)) ([74e75e8](https://github.com/googleapis/python-bigquery/commit/74e75e89ce3a5ac18112b2c1c33248445ff072e4)) + + +### Bug Fixes + +* Add pyarrow version check for range support ([#1914](https://github.com/googleapis/python-bigquery/issues/1914)) ([a86d7b9](https://github.com/googleapis/python-bigquery/commit/a86d7b96813f67fea28b46c5252416222edca9a6)) +* Edit presubmit for to simplify configuration ([#1915](https://github.com/googleapis/python-bigquery/issues/1915)) ([b739596](https://github.com/googleapis/python-bigquery/commit/b739596f37b8c00b375cc811c316b618097d761a)) + ## [3.22.0](https://github.com/googleapis/python-bigquery/compare/v3.21.0...v3.22.0) (2024-04-19) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index b6c082ffc..0938c08f6 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.22.0" +__version__ = "3.23.0" From 0dac7140a3a56960f5fdcb7c3b3dec2d6a94f515 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 21 May 2024 19:08:25 +0200 Subject: [PATCH 305/536] chore(deps): update all dependencies (#1921) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin grpcio===1.62.2 for python 3.7 support of python 3.7 is dropped starting 1.63 --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 4 ++-- samples/notebooks/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8c268759e..ff614977b 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -21,7 +21,7 @@ google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 -grpcio==1.62.2; python_version == '3.7' +grpcio===1.62.2; python_version == '3.7' grpcio==1.63.0; python_version >= '3.8' idna==3.7 munch==4.0.0 @@ -32,7 +32,7 @@ pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==16.0.0; python_version >= '3.8' +pyarrow==16.1.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index a60175de5..3407323ee 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -6,7 +6,7 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.8.4; python_version >= '3.9' +matplotlib==3.9.0; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' From eaa1a52b360646909c14ca7194b8c6b17fefdd79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 21 May 2024 15:39:43 -0500 Subject: [PATCH 306/536] perf: decrease the threshold in which we use the BQ Storage Read API (#1925) * perf: decrease the threshold in which we use the BQ Storage Read API * fix unit test * update comment --- google/cloud/bigquery/table.py | 12 +++++++++++- tests/unit/test_table.py | 10 +++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index ad1253195..6ebb0709a 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -108,7 +108,17 @@ # How many of the total rows need to be downloaded already for us to skip # calling the BQ Storage API? -ALMOST_COMPLETELY_CACHED_RATIO = 0.333 +# +# In microbenchmarks on 2024-05-21, I (tswast@) measure that at about 2 MB of +# remaining results, it's faster to use the BQ Storage Read API to download +# the results than use jobs.getQueryResults. Since we don't have a good way to +# know the remaining bytes, we estimate by remaining number of rows. +# +# Except when rows themselves are larger, I observe that the a single page of +# results will be around 10 MB. Therefore, the proportion of rows already +# downloaded should be 10 (first page) / 12 (all results) or less for it to be +# worth it to make a call to jobs.getQueryResults. +ALMOST_COMPLETELY_CACHED_RATIO = 0.833333 def _reference_getter(table): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 099529f95..fcbba03aa 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2307,9 +2307,17 @@ def test__is_almost_completely_cached_returns_true_with_some_rows_remaining(self rows = [ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Pebbles Phlyntstone"}, {"v": "4"}]}, + {"f": [{"v": "Bamm-Bamm Rhubble"}, {"v": "5"}]}, + {"f": [{"v": "Joseph Rockhead"}, {"v": "32"}]}, + {"f": [{"v": "Perry Masonry"}, {"v": "33"}]}, ] first_page = {"pageToken": "next-page", "rows": rows} - iterator = self._make_one(first_page_response=first_page, total_rows=6) + iterator = self._make_one( + first_page_response=first_page, total_rows=len(rows) + 1 + ) self.assertTrue(iterator._is_almost_completely_cached()) def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self): From 32b2c35d7ea5312b0da344518f56985a967ddd0b Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 21 May 2024 13:52:55 -0700 Subject: [PATCH 307/536] chore(main): release 3.23.1 (#1927) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 804c0ae1c..b9a2c3149 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.23.1](https://github.com/googleapis/python-bigquery/compare/v3.23.0...v3.23.1) (2024-05-21) + + +### Performance Improvements + +* Decrease the threshold in which we use the BQ Storage Read API ([#1925](https://github.com/googleapis/python-bigquery/issues/1925)) ([eaa1a52](https://github.com/googleapis/python-bigquery/commit/eaa1a52b360646909c14ca7194b8c6b17fefdd79)) + ## [3.23.0](https://github.com/googleapis/python-bigquery/compare/v3.22.0...v3.23.0) (2024-05-16) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0938c08f6..a62f73ed4 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.23.0" +__version__ = "3.23.1" From c3f7b237383d4705ed6e720544728c4db61f6c83 Mon Sep 17 00:00:00 2001 From: sclmn <128747290+sclmn@users.noreply.github.com> Date: Tue, 21 May 2024 14:44:45 -0700 Subject: [PATCH 308/536] feat: add support for map target type in Parquet options (#1919) * Update format_options.py to include the newly added map target type. The map target type creates a schema without the added key_value repeated field. * Added tests * add unit test * lint --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/format_options.py | 15 +++++++++++++++ tests/unit/test_format_options.py | 8 +++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/format_options.py b/google/cloud/bigquery/format_options.py index 1208565a9..ad5591b1c 100644 --- a/google/cloud/bigquery/format_options.py +++ b/google/cloud/bigquery/format_options.py @@ -105,6 +105,21 @@ def enable_list_inference(self) -> bool: def enable_list_inference(self, value: bool) -> None: self._properties["enableListInference"] = value + @property + def map_target_type(self) -> str: + """Indicates whether to simplify the representation of parquet maps to only show keys and values.""" + + return self._properties.get("mapTargetType") + + @map_target_type.setter + def map_target_type(self, value: str) -> None: + """Sets the map target type. + + Args: + value: The map target type (eg ARRAY_OF_STRUCT). + """ + self._properties["mapTargetType"] = value + @classmethod def from_api_repr(cls, resource: Dict[str, bool]) -> "ParquetOptions": """Factory: construct an instance from a resource dict. diff --git a/tests/unit/test_format_options.py b/tests/unit/test_format_options.py index c8fecbfa6..94a01570f 100644 --- a/tests/unit/test_format_options.py +++ b/tests/unit/test_format_options.py @@ -54,11 +54,17 @@ def test_from_api_repr(self): ) assert not config.enum_as_string assert config.enable_list_inference + assert config.map_target_type is None def test_to_api_repr(self): config = self._get_target_class()() config.enum_as_string = True config.enable_list_inference = False + config.map_target_type = "ARRAY_OF_STRUCT" result = config.to_api_repr() - assert result == {"enumAsString": True, "enableListInference": False} + assert result == { + "enumAsString": True, + "enableListInference": False, + "mapTargetType": "ARRAY_OF_STRUCT", + } From 4f72723f539d35977bc52c5950f6e00889b5c7be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 23 May 2024 14:48:36 -0500 Subject: [PATCH 309/536] fix: retry `is_job_done` on `ConnectionError` (#1930) --- google/cloud/bigquery/retry.py | 8 +++ tests/unit/test_job_retry.py | 117 +++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index c9898287f..111034519 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -119,6 +119,14 @@ def _job_should_retry(exc): if isinstance(exc, exceptions.RetryError): exc = exc.cause + # Per https://github.com/googleapis/python-bigquery/issues/1929, sometimes + # retriable errors make their way here. Because of the separate + # `restart_query_job` logic to make sure we aren't restarting non-failed + # jobs, it should be safe to continue and not totally fail our attempt at + # waiting for the query to complete. + if _should_retry(exc): + return True + if not hasattr(exc, "errors") or len(exc.errors) == 0: return False diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 43ddae1dc..2dcc5878d 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -21,6 +21,7 @@ import google.api_core.exceptions import google.api_core.retry import freezegun +import requests.exceptions from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers @@ -126,6 +127,122 @@ def api_request(method, path, query_params=None, data=None, **kw): assert job.job_id == orig_job_id +def test_retry_connection_error_with_default_retries_and_successful_first_job( + monkeypatch, client +): + """ + Make sure ConnectionError can be retried at `is_job_done` level, even if + retries are exhaused by API-level retry. + + Note: Because restart_query_job is set to True only in the case of a + confirmed job failure, this should be safe to do even when a job is not + idempotent. + + Regression test for issue + https://github.com/googleapis/python-bigquery/issues/1929 + """ + job_counter = 0 + + def make_job_id(*args, **kwargs): + nonlocal job_counter + job_counter += 1 + return f"{job_counter}" + + monkeypatch.setattr(_job_helpers, "make_job_id", make_job_id) + conn = client._connection = make_connection() + project = client.project + job_reference_1 = {"projectId": project, "jobId": "1", "location": "test-loc"} + NUM_API_RETRIES = 2 + + with freezegun.freeze_time( + "2024-01-01 00:00:00", + # Note: because of exponential backoff and a bit of jitter, + # NUM_API_RETRIES will get less accurate the greater the value. + # We add 1 because we know there will be at least some additional + # calls to fetch the time / sleep before the retry deadline is hit. + auto_tick_seconds=( + google.cloud.bigquery.retry._DEFAULT_RETRY_DEADLINE / NUM_API_RETRIES + ) + + 1, + ): + conn.api_request.side_effect = [ + # jobs.insert + {"jobReference": job_reference_1, "status": {"state": "PENDING"}}, + # jobs.get + {"jobReference": job_reference_1, "status": {"state": "RUNNING"}}, + # jobs.getQueryResults x2 + requests.exceptions.ConnectionError(), + requests.exceptions.ConnectionError(), + # jobs.get + # Job actually succeeeded, so we shouldn't be restarting the job, + # even though we are retrying at the `is_job_done` level. + {"jobReference": job_reference_1, "status": {"state": "DONE"}}, + # jobs.getQueryResults + {"jobReference": job_reference_1, "jobComplete": True}, + ] + + job = client.query("select 1") + rows_iter = job.result() + + assert job.done() # Shouldn't make any additional API calls. + assert rows_iter is not None + + # Should only have created one job, even though we did call job_retry. + assert job_counter == 1 + + # Double-check that we made the API calls we expected to make. + conn.api_request.assert_has_calls( + [ + # jobs.insert + mock.call( + method="POST", + path="/projects/PROJECT/jobs", + data={ + "jobReference": {"jobId": "1", "projectId": "PROJECT"}, + "configuration": { + "query": {"useLegacySql": False, "query": "select 1"} + }, + }, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/1", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.getQueryResults x2 + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + # jobs.get -- is_job_done checking again + mock.call( + method="GET", + path="/projects/PROJECT/jobs/1", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.getQueryResults + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=120, + ), + ], + ) + + def test_query_retry_with_default_retry_and_ambiguous_errors_only_retries_with_failed_job( client, monkeypatch ): From 7b080bed63f1a8b544ccb470aae3870c2fc57133 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 28 May 2024 17:05:21 +0200 Subject: [PATCH 310/536] chore(deps): update all dependencies (#1926) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 4487e2ef3..a6c397822 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.0; python_version >= '3.8' +pytest==8.2.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 716f088ac..fcfd8f842 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.22.0 +google-cloud-bigquery==3.23.0 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 3689fda4e..64d436dcf 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.2.0; python_version >= '3.8' +pytest==8.2.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ff614977b..a40f2fc1f 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -15,14 +15,14 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.4; python_version >= '3.9' google-api-core==2.19.0 google-auth==2.29.0 -google-cloud-bigquery==3.22.0 +google-cloud-bigquery==3.23.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.63.0; python_version >= '3.8' +grpcio==1.64.0; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 4487e2ef3..a6c397822 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.0; python_version >= '3.8' +pytest==8.2.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 67be479e1..afa69b3c0 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.22.0 +google.cloud.bigquery==3.23.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 4487e2ef3..a6c397822 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.0; python_version >= '3.8' +pytest==8.2.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 3407323ee..67baedb0d 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.22.0 +google-cloud-bigquery==3.23.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 3c8fcc27d..bd1ba5028 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.0; python_version >= '3.8' +pytest==8.2.1; python_version >= '3.8' mock==5.1.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index a5e90118f..7601e0772 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.22.0 +google-cloud-bigquery==3.23.0 From b844eef4d13a1575a7ee89a9824d2e1ebb06c48b Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 29 May 2024 17:36:50 +0200 Subject: [PATCH 311/536] chore(deps): update all dependencies (#1934) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index fcfd8f842..b35cc414c 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.23.0 +google-cloud-bigquery==3.23.1 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index a40f2fc1f..ec58831e8 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -15,7 +15,7 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.4; python_version >= '3.9' google-api-core==2.19.0 google-auth==2.29.0 -google-cloud-bigquery==3.23.0 +google-cloud-bigquery==3.23.1 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 @@ -48,7 +48,7 @@ rsa==4.9 Shapely==2.0.4 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.11.0; python_version >= '3.8' +typing-extensions==4.12.0; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index afa69b3c0..e3a225b79 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.23.0 +google.cloud.bigquery==3.23.1 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 67baedb0d..f774ea183 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.23.0 +google-cloud-bigquery==3.23.1 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7601e0772..89fe16387 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.23.0 +google-cloud-bigquery==3.23.1 From 67ec0c1a93b66afc07d84b049d51d217046d1fa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 29 May 2024 14:43:41 -0500 Subject: [PATCH 312/536] test: verify `Client._connection.extra_headers` functionality (#1932) --- tests/unit/test_client.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index e9e74b06b..a5434019b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -271,6 +271,30 @@ def test_ctor_w_load_job_config(self): self.assertIsInstance(client._default_load_job_config, LoadJobConfig) self.assertTrue(client._default_load_job_config.create_session) + def test__call_api_extra_headers(self): + # Note: We test at a lower layer to ensure that extra headers are + # populated when we actually make the call in requests. + # Arrange + http = mock.create_autospec(requests.Session, instance=True) + http.is_mtls = False + response = mock.create_autospec(requests.Response, instance=True) + response.status_code = 200 + http.request.return_value = response + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + # Act + client._connection.extra_headers = {"x-goog-request-reason": "because-friday"} + client._call_api( + retry=None, method="GET", path="/bigquery/v2/projects/my-proj/jobs/my-job" + ) + + # Assert + http.request.assert_called_once() + _, kwargs = http.request.call_args + headers = kwargs["headers"] + assert headers["x-goog-request-reason"] == "because-friday" + def test__call_api_applying_custom_retry_on_timeout(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY From f19d39801cb5828929f1117ab93492aefdada5fd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 29 May 2024 15:07:21 -0700 Subject: [PATCH 313/536] chore(deps): bump requests from 2.31.0 to 2.32.2 in /samples/geography (#1933) * chore(deps): bump requests from 2.31.0 to 2.32.2 in /samples/geography Bumps [requests](https://github.com/psf/requests) from 2.31.0 to 2.32.2. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.31.0...v2.32.2) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] * pin requests==2.31.0 for python 3.7 --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ec58831e8..7db2fa855 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -43,7 +43,8 @@ pyparsing==3.1.2 python-dateutil==2.9.0.post0 pytz==2024.1 PyYAML==6.0.1 -requests==2.31.0 +requests==2.31.0; python_version == '3.7' +requests==2.32.2; python_version >= '3.8' rsa==4.9 Shapely==2.0.4 six==1.16.0 From 5f85e2e51c8523f9b2539be924380e2fd3e2171c Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 30 May 2024 12:48:36 -0700 Subject: [PATCH 314/536] chore: add warning if storage module not found (#1937) * chore: add warning if storage module not found * Update tests/unit/test_table.py Co-authored-by: Chalmer Lowe * Update tests/unit/test_table.py Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/table.py | 4 ++++ tests/unit/test_table.py | 14 ++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 6ebb0709a..57fc0d2be 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1724,6 +1724,10 @@ def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): try: _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True) except bq_exceptions.BigQueryStorageNotFoundError: + warnings.warn( + "BigQuery Storage module not found, fetch data with the REST " + "endpoint instead." + ) return False except bq_exceptions.LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index fcbba03aa..2a49b0632 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2360,24 +2360,30 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) - def test__should_use_bqstorage_returns_false_if_missing_dependency(self): + def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached - + def fail_bqstorage_import(name, globals, locals, fromlist, level): + """Returns True if bigquery_storage has been imported.""" # NOTE: *very* simplified, assuming a straightforward absolute import return "bigquery_storage" in name or ( fromlist is not None and "bigquery_storage" in fromlist ) - + # maybe_fail_import() returns ImportError if the predicate is True no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) - with no_bqstorage: + with no_bqstorage, warnings.catch_warnings(record=True) as warned: result = iterator._should_use_bqstorage( bqstorage_client=None, create_bqstorage_client=True ) self.assertFalse(result) + matching_warnings = [ + warning for warning in warned if "Storage module not found" in str(warning) + ] + assert matching_warnings, "Dependency not found warning not raised." + def test__should_use_bqstorage_returns_false_w_warning_if_obsolete_version(self): pytest.importorskip("google.cloud.bigquery_storage") iterator = self._make_one(first_page_response=None) # not cached From 618d4bbb3d1c7b1ddee63ead16f478e8a01137f0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 31 May 2024 16:13:29 +0200 Subject: [PATCH 315/536] chore(deps): update all dependencies (#1936) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- tests/unit/test_table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 2a49b0632..7a97c7b78 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2362,13 +2362,14 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached - + def fail_bqstorage_import(name, globals, locals, fromlist, level): """Returns True if bigquery_storage has been imported.""" # NOTE: *very* simplified, assuming a straightforward absolute import return "bigquery_storage" in name or ( fromlist is not None and "bigquery_storage" in fromlist ) + # maybe_fail_import() returns ImportError if the predicate is True no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) From 94d61b059007d340153dbe61a7212874e884bf21 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 31 May 2024 18:53:28 +0200 Subject: [PATCH 316/536] chore(deps): update all dependencies (#1938) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7db2fa855..becaaf50a 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -44,7 +44,7 @@ python-dateutil==2.9.0.post0 pytz==2024.1 PyYAML==6.0.1 requests==2.31.0; python_version == '3.7' -requests==2.32.2; python_version >= '3.8' +requests==2.32.3; python_version >= '3.8' rsa==4.9 Shapely==2.0.4 six==1.16.0 From 9fbad767cc228e02040436742d0cb6743d370b90 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 31 May 2024 14:41:45 -0700 Subject: [PATCH 317/536] feat: add default timeout for Client.get_job() (#1935) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add default timeout for Client.get_job() * change timeout type detection * lint * fix unit test and coverage * add type hint * fix type hint * change import style and add comments * remove sentinel value in client * type hint * typo * add sentinel for query_and_wait() * add unit tests * fix unit test * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Sweña (Swast) * address comments * typo * type hint * typos --------- Co-authored-by: Tim Sweña (Swast) --- google/cloud/bigquery/_helpers.py | 2 + google/cloud/bigquery/_job_helpers.py | 13 +- google/cloud/bigquery/client.py | 5 +- google/cloud/bigquery/job/base.py | 29 ++--- google/cloud/bigquery/job/query.py | 34 +++-- google/cloud/bigquery/retry.py | 11 ++ tests/unit/job/test_base.py | 86 +++++++++--- tests/unit/job/test_copy.py | 34 ++++- tests/unit/job/test_extract.py | 32 ++++- tests/unit/job/test_load.py | 47 +++++-- tests/unit/job/test_query.py | 91 +++++++++++-- tests/unit/test__job_helpers.py | 60 +++------ tests/unit/test_client.py | 6 +- tests/unit/test_job_retry.py | 180 +++++++++++++------------- 14 files changed, 421 insertions(+), 209 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 668b4ca3d..5ee5e1850 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -33,6 +33,8 @@ from google.auth import credentials as ga_credentials # type: ignore from google.api_core import client_options as client_options_lib +TimeoutType = Union[float, None] + _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 290439394..e66ab2763 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -39,7 +39,7 @@ import functools import os import uuid -from typing import Any, Dict, TYPE_CHECKING, Optional +from typing import Any, Dict, Optional, TYPE_CHECKING, Union import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries @@ -47,6 +47,7 @@ from google.cloud.bigquery import job import google.cloud.bigquery.query from google.cloud.bigquery import table +from google.cloud.bigquery.retry import POLLING_DEFAULT_VALUE # Avoid circular imports if TYPE_CHECKING: # pragma: NO COVER @@ -328,7 +329,7 @@ def query_and_wait( location: Optional[str], project: str, api_timeout: Optional[float] = None, - wait_timeout: Optional[float] = None, + wait_timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE, retry: Optional[retries.Retry], job_retry: Optional[retries.Retry], page_size: Optional[int] = None, @@ -364,10 +365,12 @@ def query_and_wait( api_timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - wait_timeout (Optional[float]): + wait_timeout (Optional[Union[float, object]]): The number of seconds to wait for the query to finish. If the query doesn't finish before this timeout, the client attempts - to cancel the query. + to cancel the query. If unset, the underlying Client.get_job() API + call has timeout, but we still wait indefinitely for the job to + finish. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. This only applies to making RPC calls. It isn't used to retry failed jobs. This has @@ -545,7 +548,7 @@ def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: def _wait_or_cancel( job: job.QueryJob, api_timeout: Optional[float], - wait_timeout: Optional[float], + wait_timeout: Optional[Union[object, float]], retry: Optional[retries.Retry], page_size: Optional[int], max_results: Optional[int], diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 891a54e5c..4234767fe 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -82,6 +82,7 @@ from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE from google.cloud.bigquery._helpers import _validate_universe from google.cloud.bigquery._helpers import _get_client_universe +from google.cloud.bigquery._helpers import TimeoutType from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem @@ -107,6 +108,7 @@ DEFAULT_JOB_RETRY, DEFAULT_RETRY, DEFAULT_TIMEOUT, + DEFAULT_GET_JOB_TIMEOUT, ) from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference @@ -123,7 +125,6 @@ _versions_helpers.PANDAS_VERSIONS.try_import() ) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this -TimeoutType = Union[float, None] ResumableTimeoutType = Union[ None, float, Tuple[float, float] ] # for resumable media methods @@ -2139,7 +2140,7 @@ def get_job( project: Optional[str] = None, location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: TimeoutType = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_GET_JOB_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: """Fetch a job for the project associated with this client. diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 2641afea8..6f9726181 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -26,8 +26,11 @@ import google.api_core.future.polling from google.cloud.bigquery import _helpers -from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery._helpers import _int_or_none +from google.cloud.bigquery.retry import ( + DEFAULT_GET_JOB_TIMEOUT, + DEFAULT_RETRY, +) _DONE_STATE = "DONE" @@ -801,7 +804,7 @@ def reload( self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, - timeout: Optional[float] = None, + timeout: Optional[float] = DEFAULT_GET_JOB_TIMEOUT, ): """API call: refresh job properties via a GET request. @@ -820,22 +823,14 @@ def reload( """ client = self._require_client(client) - extra_params = {} - if self.location: - extra_params["location"] = self.location - span_attributes = {"path": self.path} - - api_response = client._call_api( - retry, - span_name="BigQuery.job.reload", - span_attributes=span_attributes, - job_ref=self, - method="GET", - path=self.path, - query_params=extra_params, + got_job = client.get_job( + self, + project=self.project, + location=self.location, + retry=retry, timeout=timeout, ) - self._set_properties(api_response) + self._set_properties(got_job._properties) def cancel( self, @@ -913,7 +908,7 @@ def _set_future_result(self): def done( self, retry: "retries.Retry" = DEFAULT_RETRY, - timeout: Optional[float] = None, + timeout: Optional[float] = DEFAULT_GET_JOB_TIMEOUT, reload: bool = True, ) -> bool: """Checks if the job is complete. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 09a69e11c..25b89c3d7 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -40,7 +40,11 @@ StructQueryParameter, UDFResource, ) -from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY +from google.cloud.bigquery.retry import ( + DEFAULT_RETRY, + DEFAULT_JOB_RETRY, + POLLING_DEFAULT_VALUE, +) from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _EmptyRowIterator @@ -1437,7 +1441,7 @@ def result( # type: ignore # (incompatible with supertype) page_size: Optional[int] = None, max_results: Optional[int] = None, retry: Optional[retries.Retry] = DEFAULT_RETRY, - timeout: Optional[float] = None, + timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE, start_index: Optional[int] = None, job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: @@ -1457,11 +1461,14 @@ def result( # type: ignore # (incompatible with supertype) is ``DONE``, retrying is aborted early even if the results are not available, as this will not change anymore. - timeout (Optional[float]): + timeout (Optional[Union[float, \ + google.api_core.future.polling.PollingFuture._DEFAULT_VALUE, \ + ]]): The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - If multiple requests are made under the hood, ``timeout`` - applies to each individual request. + before using ``retry``. If ``None``, wait indefinitely + unless an error is returned. If unset, only the + underlying API calls have their default timeouts, but we still + wait indefinitely for the job to finish. start_index (Optional[int]): The zero-based index of the starting row to read. job_retry (Optional[google.api_core.retry.Retry]): @@ -1507,6 +1514,13 @@ def result( # type: ignore # (incompatible with supertype) # Intentionally omit job_id and query_id since this doesn't # actually correspond to a finished query job. ) + + # When timeout has default sentinel value ``object()``, do not pass + # anything to invoke default timeouts in subsequent calls. + kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {} + if type(timeout) is not object: + kwargs["timeout"] = timeout + try: retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: @@ -1548,7 +1562,7 @@ def is_job_done(): # rateLimitExceeded errors are ambiguous. We want to know if # the query job failed and not just the call to # jobs.getQueryResults. - if self.done(retry=retry, timeout=timeout): + if self.done(retry=retry, **kwargs): # If it's already failed, we might as well stop. job_failed_exception = self.exception() if job_failed_exception is not None: @@ -1585,14 +1599,14 @@ def is_job_done(): # response from the REST API. This ensures we aren't # making any extra API calls if the previous loop # iteration fetched the finished job. - self._reload_query_results(retry=retry, timeout=timeout) + self._reload_query_results(retry=retry, **kwargs) return True # Call jobs.getQueryResults with max results set to 0 just to # wait for the query to finish. Unlike most methods, # jobs.getQueryResults hangs as long as it can to ensure we # know when the query has finished as soon as possible. - self._reload_query_results(retry=retry, timeout=timeout) + self._reload_query_results(retry=retry, **kwargs) # Even if the query is finished now according to # jobs.getQueryResults, we'll want to reload the job status if @@ -1682,10 +1696,10 @@ def is_job_done(): max_results=max_results, start_index=start_index, retry=retry, - timeout=timeout, query_id=self.query_id, first_page_response=first_page_response, num_dml_affected_rows=self._query_results.num_dml_affected_rows, + **kwargs, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 111034519..10958980d 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -14,6 +14,7 @@ from google.api_core import exceptions from google.api_core import retry +import google.api_core.future.polling from google.auth import exceptions as auth_exceptions # type: ignore import requests.exceptions @@ -140,3 +141,13 @@ def _job_should_retry(exc): """ The default job retry object. """ + +DEFAULT_GET_JOB_TIMEOUT = 128 +""" +Default timeout for Client.get_job(). +""" + +POLLING_DEFAULT_VALUE = google.api_core.future.polling.PollingFuture._DEFAULT_VALUE +""" +Default value defined in google.api_core.future.polling.PollingFuture. +""" diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 186729529..a7337afd2 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -22,6 +22,8 @@ from google.api_core.future import polling import pytest +from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + from ..helpers import make_connection from .helpers import _make_client @@ -709,7 +711,7 @@ def test_exists_w_timeout(self): ) def test_reload_defaults(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY + from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_GET_JOB_TIMEOUT resource = { "jobReference": { @@ -729,15 +731,19 @@ def test_reload_defaults(self): call_api.assert_called_once_with( DEFAULT_RETRY, - span_name="BigQuery.job.reload", + span_name="BigQuery.getJob", span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + "job_id": "job-id", + "location": "us-central", }, - job_ref=job, method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"location": self.LOCATION}, - timeout=None, + query_params={ + "projection": "full", + "location": "us-central", + }, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self.assertEqual(job._properties, expected) @@ -764,18 +770,43 @@ def test_reload_explicit(self): call_api.assert_called_once_with( retry, - span_name="BigQuery.job.reload", + span_name="BigQuery.getJob", span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + "job_id": "job-id", + "location": None, }, - job_ref=job, method="GET", path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={}, + query_params={"projection": "full"}, timeout=4.2, ) self.assertEqual(job._properties, expected) + def test_reload_none_timeout(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + client = _make_client(project=self.PROJECT) + conn = client._connection = make_connection(resource) + job = self._set_properties_job() + retry = DEFAULT_RETRY.with_deadline(1) + job.reload(client=client, retry=retry, timeout=None) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"projection": "full"}, + timeout=None, + ) + def test_cancel_defaults(self): resource = { "jobReference": { @@ -952,7 +983,10 @@ def test_done_defaults_wo_state(self): self.assertFalse(job.done()) - reload_.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) + reload_.assert_called_once_with( + retry=DEFAULT_RETRY, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) def test_done_explicit_wo_state(self): from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -966,6 +1000,18 @@ def test_done_explicit_wo_state(self): reload_.assert_called_once_with(retry=retry, timeout=7.5) + def test_done_with_none_timeout(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + reload_ = job.reload = mock.Mock() + retry = DEFAULT_RETRY.with_deadline(1) + + self.assertFalse(job.done(retry=retry, timeout=None)) + + reload_.assert_called_once_with(retry=retry, timeout=None) + def test_done_already(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -974,6 +1020,8 @@ def test_done_already(self): self.assertTrue(job.done()) def test_result_default_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + begun_job_resource = _make_job_resource( job_id=self.JOB_ID, project_id=self.PROJECT, location="US", started=True ) @@ -1003,12 +1051,17 @@ def test_result_default_wo_state(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "US"}, - timeout=None, + query_params={ + "projection": "full", + "location": "US", + }, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) conn.api_request.assert_has_calls([begin_call, begin_call, reload_call]) def test_result_w_retry_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + begun_job_resource = _make_job_resource( job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True ) @@ -1054,8 +1107,11 @@ def test_result_w_retry_wo_state(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "EU"}, - timeout=None, + query_params={ + "projection": "full", + "location": "EU", + }, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) conn.api_request.assert_has_calls( [begin_call, begin_call, reload_call, reload_call] diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py index e1bb20db2..4b0945310 100644 --- a/tests/unit/job/test_copy.py +++ b/tests/unit/job/test_copy.py @@ -477,6 +477,8 @@ def test_exists_hit_w_alternate_client(self): ) def test_reload_w_bound_client(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn = make_connection(RESOURCE) @@ -489,14 +491,27 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, client, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client, + None, + ) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn1 = make_connection() @@ -511,10 +526,21 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, client2, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client2, + None, + ) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py index ee0d67d68..ebf9f09e6 100644 --- a/tests/unit/job/test_extract.py +++ b/tests/unit/job/test_extract.py @@ -399,6 +399,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() @@ -412,14 +413,26 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, client, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client, + None, + ) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() @@ -435,10 +448,21 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, client2, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client2, + None, + ) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 976fec914..0fb044696 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -714,6 +714,8 @@ def test_exists_miss_w_job_reference(self): ) def test_reload_w_bound_client(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn = make_connection(RESOURCE) @@ -724,14 +726,27 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, client, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client, + None, + ) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_alternate_client(self): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() conn1 = make_connection() @@ -744,16 +759,28 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, client2, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client2, + None, + ) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) def test_reload_w_job_reference(self): from google.cloud.bigquery import job + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT resource = self._make_resource(ended=True) resource["jobReference"]["projectId"] = "alternative-project" @@ -768,16 +795,20 @@ def test_reload_w_job_reference(self): load_job.reload() final_attributes.assert_called_with( - {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)}, + { + "path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID), + "job_id": self.JOB_ID, + "location": "US", + }, client, - load_job, + None, ) conn.api_request.assert_called_once_with( method="GET", path="/projects/alternative-project/jobs/{}".format(self.JOB_ID), - query_params={"location": "US"}, - timeout=None, + query_params={"projection": "full", "location": "US"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) def test_cancel_w_bound_client(self): diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 0fee053e3..c7b2c5f9c 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -28,6 +28,7 @@ from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery._job_helpers import google.cloud.bigquery.query +from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT from google.cloud.bigquery.table import _EmptyRowIterator from ..helpers import make_connection @@ -959,8 +960,8 @@ def test_result_reloads_job_state_until_done(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "EU"}, - timeout=None, + query_params={"projection": "full", "location": "EU"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) query_page_call = mock.call( method="GET", @@ -1104,7 +1105,37 @@ def test_result_with_done_jobs_query_response_doesnt_call_get_query_results(self conn.api_request.assert_called_once_with( method="GET", path=job_path, - query_params={}, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) + + def test_result_with_none_timeout(self): + # Verifies that with an intentional None timeout, get job uses None + # instead of the default timeout. + job_resource = self._make_resource(started=True, ended=True, location="EU") + conn = make_connection(job_resource) + client = _make_client(self.PROJECT, connection=conn) + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "rows": [{"f": [{"v": "abc"}]}], + "totalRows": "1", + } + job = google.cloud.bigquery._job_helpers._to_query_job( + client, + "SELECT 'abc' AS col1", + request_config=None, + query_response=query_resource_done, + ) + + job.result(timeout=None) + + job_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}" + conn.api_request.assert_called_once_with( + method="GET", + path=job_path, + query_params={"projection": "full"}, timeout=None, ) @@ -1287,8 +1318,8 @@ def test_result_w_custom_retry(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "asia-northeast1"}, - timeout=None, + query_params={"projection": "full", "location": "asia-northeast1"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) connection.api_request.assert_has_calls( @@ -1367,7 +1398,7 @@ def test_result_w_timeout_doesnt_raise(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "US"}, + query_params={"projection": "full", "location": "US"}, timeout=1.125, ) get_query_results_call = mock.call( @@ -1412,7 +1443,7 @@ def test_result_w_timeout_raises_concurrent_futures_timeout(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"location": "US"}, + query_params={"projection": "full", "location": "US"}, timeout=1.125, ) get_query_results_call = mock.call( @@ -2160,12 +2191,23 @@ def test_reload_w_bound_client(self): ) as final_attributes: job.reload() - final_attributes.assert_called_with({"path": PATH}, client, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client, + None, + ) self.assertNotEqual(job.destination, table_ref) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) @@ -2190,11 +2232,22 @@ def test_reload_w_alternate_client(self): ) as final_attributes: job.reload(client=client2) - final_attributes.assert_called_with({"path": PATH}, client2, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client2, + None, + ) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) self._verifyResourceProperties(job, RESOURCE) @@ -2217,13 +2270,23 @@ def test_reload_w_timeout(self): "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: job.reload(timeout=4.2) - - final_attributes.assert_called_with({"path": PATH}, client, job) + final_attributes.assert_called_with( + { + "path": PATH, + "job_id": self.JOB_ID, + "location": None, + }, + client, + None, + ) self.assertNotEqual(job.destination, table_ref) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=4.2 + method="GET", + path=PATH, + query_params={"projection": "full"}, + timeout=4.2, ) def test_iter(self): diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 9f661dca7..96914d9f9 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import functools from typing import Any, Dict, Optional from unittest import mock @@ -21,15 +20,18 @@ from google.api_core import retry as retries import pytest -from google.cloud.bigquery.client import Client -from google.cloud.bigquery import enums from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery import enums +from google.cloud.bigquery import retry +from google.cloud.bigquery.client import Client from google.cloud.bigquery.job import copy_ as job_copy from google.cloud.bigquery.job import extract as job_extract from google.cloud.bigquery.job import load as job_load from google.cloud.bigquery.job import query as job_query from google.cloud.bigquery.query import ConnectionProperty, ScalarQueryParameter +from .helpers import make_client, make_connection + def make_query_request(additional_properties: Optional[Dict[str, Any]] = None): request = {"useLegacySql": False, "formatOptions": {"useInt64Timestamp": True}} @@ -806,11 +808,8 @@ def test_query_and_wait_caches_completed_query_results_one_page_no_rows(): def test_query_and_wait_caches_completed_query_results_more_pages(): - client = mock.create_autospec(Client) - client._list_rows_from_query_results = functools.partial( - Client._list_rows_from_query_results, client - ) - client._call_api.side_effect = ( + client = make_client() + conn = client._connection = make_connection( { "jobReference": { "projectId": "response-project", @@ -882,10 +881,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): # Start the query. jobs_query_path = "/projects/request-project/queries" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.query", - span_attributes={"path": jobs_query_path}, + conn.api_request.assert_any_call( method="POST", path=jobs_query_path, data={ @@ -906,8 +902,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): # Fetch the remaining two pages. jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" - client._call_api.assert_any_call( - None, # retry + conn.api_request.assert_any_call( timeout=None, method="GET", path=jobs_get_query_results_path, @@ -918,8 +913,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): "formatOptions.useInt64Timestamp": True, }, ) - client._call_api.assert_any_call( - None, # retry + conn.api_request.assert_any_call( timeout=None, method="GET", path=jobs_get_query_results_path, @@ -933,12 +927,8 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): def test_query_and_wait_incomplete_query(): - client = mock.create_autospec(Client) - client._get_query_results = functools.partial(Client._get_query_results, client) - client._list_rows_from_query_results = functools.partial( - Client._list_rows_from_query_results, client - ) - client._call_api.side_effect = ( + client = make_client() + conn = client._connection = make_connection( # jobs.query { "jobReference": { @@ -1022,10 +1012,7 @@ def test_query_and_wait_incomplete_query(): # Start the query. jobs_query_path = "/projects/request-project/queries" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.query", - span_attributes={"path": jobs_query_path}, + conn.api_request.assert_any_call( method="POST", path=jobs_query_path, data={ @@ -1041,10 +1028,7 @@ def test_query_and_wait_incomplete_query(): # Wait for the query to finish. jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.getQueryResults", - span_attributes={"path": jobs_get_query_results_path}, + conn.api_request.assert_any_call( method="GET", path=jobs_get_query_results_path, query_params={ @@ -1063,20 +1047,15 @@ def test_query_and_wait_incomplete_query(): # Fetch the job metadata in case the RowIterator needs the destination table. jobs_get_path = "/projects/response-project/jobs/response-job-id" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.job.reload", - span_attributes={"path": jobs_get_path}, - job_ref=mock.ANY, + conn.api_request.assert_any_call( method="GET", path=jobs_get_path, - query_params={"location": "response-location"}, - timeout=None, + query_params={"projection": "full", "location": "response-location"}, + timeout=retry.DEFAULT_GET_JOB_TIMEOUT, ) # Fetch the remaining two pages. - client._call_api.assert_any_call( - None, # retry + conn.api_request.assert_any_call( timeout=None, method="GET", path=jobs_get_query_results_path, @@ -1086,8 +1065,7 @@ def test_query_and_wait_incomplete_query(): "formatOptions.useInt64Timestamp": True, }, ) - client._call_api.assert_any_call( - None, # retry + conn.api_request.assert_any_call( timeout=None, method="GET", path=jobs_get_query_results_path, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index a5434019b..ed5575f6c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -3167,6 +3167,7 @@ def test_job_from_resource_unknown_type(self): def test_get_job_miss_w_explict_project(self): from google.cloud.exceptions import NotFound + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT OTHER_PROJECT = "OTHER_PROJECT" JOB_ID = "NONESUCH" @@ -3181,11 +3182,12 @@ def test_get_job_miss_w_explict_project(self): method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", query_params={"projection": "full"}, - timeout=DEFAULT_TIMEOUT, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) def test_get_job_miss_w_client_location(self): from google.cloud.exceptions import NotFound + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT JOB_ID = "NONESUCH" creds = _make_credentials() @@ -3199,7 +3201,7 @@ def test_get_job_miss_w_client_location(self): method="GET", path="/projects/client-proj/jobs/NONESUCH", query_params={"projection": "full", "location": "client-loc"}, - timeout=DEFAULT_TIMEOUT, + timeout=DEFAULT_GET_JOB_TIMEOUT, ) def test_get_job_hit_w_timeout(self): diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 2dcc5878d..46eb1d6b3 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -23,85 +23,93 @@ import freezegun import requests.exceptions -from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers import google.cloud.bigquery.retry -from .helpers import make_connection +from .helpers import make_client, make_connection -# With job_retry_on_query, we're testing 4 scenarios: +_RETRY_NOT_FOUND = { + "job_retry": google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.NotFound, + ), + ), +} +_RETRY_BAD_REQUEST = { + "job_retry": google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.BadRequest, + ), + ), +} + + +# Test retry of job failures, instead of API-invocation failures. 4 scenarios: # - No `job_retry` passed, retry on default rateLimitExceeded. # - Pass NotFound retry to `query`. # - Pass NotFound retry to `result`. # - Pass BadRequest retry to query, with the value passed to `result` overriding. -@pytest.mark.parametrize("job_retry_on_query", [None, "Query", "Result", "Both"]) @mock.patch("time.sleep") -def test_retry_failed_jobs(sleep, client, job_retry_on_query): - """ - Test retry of job failures, as opposed to API-invocation failures. - """ - - retry_notfound = google.api_core.retry.Retry( - predicate=google.api_core.retry.if_exception_type( - google.api_core.exceptions.NotFound - ) - ) - retry_badrequest = google.api_core.retry.Retry( - predicate=google.api_core.retry.if_exception_type( - google.api_core.exceptions.BadRequest - ) - ) - - if job_retry_on_query is None: - reason = "rateLimitExceeded" - else: - reason = "notFound" - +@pytest.mark.parametrize( + "reason, job_retry, result_retry", + [ + pytest.param( + "rateLimitExceeded", + {}, + {}, + id="no job_retry", + ), + pytest.param( + "notFound", + _RETRY_NOT_FOUND, + {}, + id="Query NotFound", + ), + pytest.param( + "notFound", + _RETRY_NOT_FOUND, + _RETRY_NOT_FOUND, + id="Result NotFound", + ), + pytest.param( + "notFound", + _RETRY_BAD_REQUEST, + _RETRY_NOT_FOUND, + id="BadRequest", + ), + ], +) +def test_retry_failed_jobs(sleep, reason, job_retry, result_retry): + client = make_client() err = dict(reason=reason) - responses = [ - dict(status=dict(state="DONE", errors=[err], errorResult=err)), - dict(status=dict(state="DONE", errors=[err], errorResult=err)), - dict(status=dict(state="DONE", errors=[err], errorResult=err)), - dict(status=dict(state="DONE")), + conn = client._connection = make_connection( + dict( + status=dict(state="DONE", errors=[err], errorResult=err), + jobReference={"jobId": "id_1"}, + ), + dict( + status=dict(state="DONE", errors=[err], errorResult=err), + jobReference={"jobId": "id_1"}, + ), + dict( + status=dict(state="DONE", errors=[err], errorResult=err), + jobReference={"jobId": "id_1"}, + ), + dict(status=dict(state="DONE"), jobReference={"jobId": "id_2"}), dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), - ] - - def api_request(method, path, query_params=None, data=None, **kw): - response = responses.pop(0) - if data: - response["jobReference"] = data["jobReference"] - else: - response["jobReference"] = dict( - jobId=path.split("/")[-1], projectId="PROJECT" - ) - return response - - conn = client._connection = make_connection() - conn.api_request.side_effect = api_request + ) - if job_retry_on_query == "Query": - job_retry = dict(job_retry=retry_notfound) - elif job_retry_on_query == "Both": - # This will be overridden in `result` - job_retry = dict(job_retry=retry_badrequest) - else: - job_retry = {} job = client.query("select 1", **job_retry) + result = job.result(**result_retry) - orig_job_id = job.job_id - job_retry = ( - dict(job_retry=retry_notfound) - if job_retry_on_query in ("Result", "Both") - else {} - ) - result = job.result(**job_retry) assert result.total_rows == 1 - assert not responses # We made all the calls we expected to. + + # We made all the calls we expected to. + assert conn.api_request.call_count == 5 # The job adjusts it's job id based on the id of the last attempt. - assert job.job_id != orig_job_id - assert job.job_id == conn.mock_calls[3][2]["data"]["jobReference"]["jobId"] + assert job.job_id == "id_2" # We had to sleep three times assert len(sleep.mock_calls) == 3 @@ -114,17 +122,19 @@ def api_request(method, path, query_params=None, data=None, **kw): assert max(c[1][0] for c in sleep.mock_calls) <= 8 # We can ask for the result again: - responses = [ + conn = client._connection = make_connection( dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), - ] - orig_job_id = job.job_id + ) result = job.result() + assert result.total_rows == 1 - assert not responses # We made all the calls we expected to. + + # We made all the calls we expected to. + assert conn.api_request.call_count == 1 # We wouldn't (and didn't) fail, because we're dealing with a successful job. # So the job id hasn't changed. - assert job.job_id == orig_job_id + assert job.job_id == "id_2" def test_retry_connection_error_with_default_retries_and_successful_first_job( @@ -209,8 +219,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/1", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), # jobs.getQueryResults x2 mock.call( @@ -229,8 +239,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/1", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), # jobs.getQueryResults mock.call( @@ -307,8 +317,7 @@ def make_job_id(*args, **kwargs): {"jobReference": job_reference_2, "status": {"state": "DONE"}}, ] - conn = client._connection = make_connection() - conn.api_request.side_effect = responses + conn = client._connection = make_connection(*responses) with freezegun.freeze_time( # Note: because of exponential backoff and a bit of jitter, @@ -341,8 +350,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/1", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), # jobs.getQueryResults x2 mock.call( @@ -361,8 +370,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/1", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), # jobs.insert mock.call( @@ -384,8 +393,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/2", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), # jobs.getQueryResults mock.call( @@ -398,8 +407,8 @@ def make_job_id(*args, **kwargs): mock.call( method="GET", path="/projects/PROJECT/jobs/2", - query_params={"location": "test-loc"}, - timeout=None, + query_params={"location": "test-loc", "projection": "full"}, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ), ] ) @@ -531,12 +540,9 @@ def test_query_and_wait_retries_job_for_DDL_queries(): https://github.com/googleapis/python-bigquery/issues/1790 """ freezegun.freeze_time(auto_tick_seconds=1) - client = mock.create_autospec(Client) - client._call_api.__name__ = "_call_api" - client._call_api.__qualname__ = "Client._call_api" - client._call_api.__annotations__ = {} - client._call_api.__type_params__ = () - client._call_api.side_effect = ( + + client = make_client() + conn = client._connection = make_connection( { "jobReference": { "projectId": "response-project", @@ -589,7 +595,7 @@ def test_query_and_wait_retries_job_for_DDL_queries(): # and https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults query_request_path = "/projects/request-project/queries" - calls = client._call_api.call_args_list + calls = conn.api_request.call_args_list _, kwargs = calls[0] assert kwargs["method"] == "POST" assert kwargs["path"] == query_request_path From f7a3da06b18cc3de03f5b5e6ce4d7c9d5a3f47ff Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 3 Jun 2024 10:23:37 -0700 Subject: [PATCH 318/536] testing: update BQML training option (#1943) This updates tests to use `max_iterations` rather than `max_iteration` which was an alpha option. Related: b/344469351 --- samples/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/tests/conftest.py b/samples/tests/conftest.py index 91603bef2..cdf52b388 100644 --- a/samples/tests/conftest.py +++ b/samples/tests/conftest.py @@ -162,7 +162,7 @@ def model_id(client: bigquery.Client, dataset_id: str) -> str: CREATE MODEL `{}` OPTIONS ( model_type='linear_reg', - max_iteration=1, + max_iterations=1, learn_rate=0.4, learn_rate_strategy='constant' ) AS ( From fc3edd5ff2cd60986af144062bc846fde5b52746 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 3 Jun 2024 16:22:16 -0400 Subject: [PATCH 319/536] chore: updates continuous CI/CD tests against specific versions of Python (#1941) Updates the regular continuous CI/CD checks to test against specific versions of Python (versions that aren't our most recent supported version and aren't our oldest supported version). Also removes a CI/CD check that is superceded by a more recent version of check (prerelease-deps >>> replaced by prerelease-deps-3.12). Modifies owlbot to avoid it adding prerelease-deps back into the mix since that file is a default in synthtool. --- .kokoro/continuous/prerelease-deps.cfg | 7 ------- .kokoro/continuous/unit-tests-misc.cfg | 9 +++++++++ owlbot.py | 1 + 3 files changed, 10 insertions(+), 7 deletions(-) delete mode 100644 .kokoro/continuous/prerelease-deps.cfg create mode 100644 .kokoro/continuous/unit-tests-misc.cfg diff --git a/.kokoro/continuous/prerelease-deps.cfg b/.kokoro/continuous/prerelease-deps.cfg deleted file mode 100644 index 3595fb43f..000000000 --- a/.kokoro/continuous/prerelease-deps.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "prerelease_deps" -} diff --git a/.kokoro/continuous/unit-tests-misc.cfg b/.kokoro/continuous/unit-tests-misc.cfg new file mode 100644 index 000000000..6598baee7 --- /dev/null +++ b/.kokoro/continuous/unit-tests-misc.cfg @@ -0,0 +1,9 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run these nox sessions. +# A subset based on Python versions that are neither our newest OR oldest +# supported versions of Python +env_vars: { + key: "NOX_SESSION" + value: "unit_noextras-3.9 unit_noextras-3.10 unit_noextras-3.11 unit-3.9 unit-3.10 unit-3.11" +} \ No newline at end of file diff --git a/owlbot.py b/owlbot.py index c2de31042..778cc3e53 100644 --- a/owlbot.py +++ b/owlbot.py @@ -70,6 +70,7 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", + ".kokoro/continuous/prerelease-deps.cfg", ".github/workflows", # exclude gh actions as credentials are needed for tests "README.rst", ], From 3e7a48d36e3c7bf6abe1b5550097178f6ca6e174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 3 Jun 2024 18:08:31 -0500 Subject: [PATCH 320/536] perf: if `page_size` or `max_results` is set on `QueryJob.result()`, use to download first page of results (#1942) * perf: if `page_size` or `max_results` is set on `QueryJob.result()`, use to download first page of results * add unit tests for query_and_wait * populate maxResults on page 2 * fix maxResults * fix coverage --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/client.py | 23 +-- google/cloud/bigquery/job/query.py | 51 +++++-- google/cloud/bigquery/table.py | 2 +- tests/unit/job/test_query.py | 225 ++++++++++++++++------------- tests/unit/test_client.py | 122 ++++++++++++++++ tests/unit/test_job_retry.py | 2 +- 6 files changed, 300 insertions(+), 125 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 4234767fe..1c222f2dd 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -109,6 +109,7 @@ DEFAULT_RETRY, DEFAULT_TIMEOUT, DEFAULT_GET_JOB_TIMEOUT, + POLLING_DEFAULT_VALUE, ) from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference @@ -1963,6 +1964,7 @@ def _get_query_results( timeout_ms: Optional[int] = None, location: Optional[str] = None, timeout: TimeoutType = DEFAULT_TIMEOUT, + page_size: int = 0, ) -> _QueryResults: """Get the query results object for a query job. @@ -1981,13 +1983,16 @@ def _get_query_results( before using ``retry``. If set, this connection timeout may be increased to a minimum value. This prevents retries on what would otherwise be a successful response. + page_size (int): + Maximum number of rows in a single response. See maxResults in + the jobs.getQueryResults REST API. Returns: google.cloud.bigquery.query._QueryResults: A new ``_QueryResults`` instance. """ - extra_params: Dict[str, Any] = {"maxResults": 0} + extra_params: Dict[str, Any] = {"maxResults": page_size} if timeout is not None: if not isinstance(timeout, (int, float)): @@ -1995,6 +2000,9 @@ def _get_query_results( else: timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) + if page_size > 0: + extra_params["formatOptions.useInt64Timestamp"] = True + if project is None: project = self.project @@ -3504,7 +3512,7 @@ def query_and_wait( location: Optional[str] = None, project: Optional[str] = None, api_timeout: TimeoutType = DEFAULT_TIMEOUT, - wait_timeout: TimeoutType = None, + wait_timeout: Union[Optional[float], object] = POLLING_DEFAULT_VALUE, retry: retries.Retry = DEFAULT_RETRY, job_retry: retries.Retry = DEFAULT_JOB_RETRY, page_size: Optional[int] = None, @@ -3538,10 +3546,12 @@ def query_and_wait( api_timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. - wait_timeout (Optional[float]): + wait_timeout (Optional[Union[float, object]]): The number of seconds to wait for the query to finish. If the query doesn't finish before this timeout, the client attempts - to cancel the query. + to cancel the query. If unset, the underlying REST API calls + have timeouts, but we still wait indefinitely for the job to + finish. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. This only applies to making RPC calls. It isn't used to retry failed jobs. This has @@ -4128,11 +4138,6 @@ def _list_rows_from_query_results( if start_index is not None: params["startIndex"] = start_index - # We don't call jobs.query with a page size, so if the user explicitly - # requests a certain size, invalidate the cache. - if page_size is not None: - first_page_response = None - params["formatOptions.useInt64Timestamp"] = True row_iterator = RowIterator( client=self, diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 25b89c3d7..a8530271a 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1385,7 +1385,10 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): raise def _reload_query_results( - self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None + self, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, + page_size: int = 0, ): """Refresh the cached query results unless already cached and complete. @@ -1395,6 +1398,9 @@ def _reload_query_results( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (int): + Maximum number of rows in a single response. See maxResults in + the jobs.getQueryResults REST API. """ # Optimization: avoid a call to jobs.getQueryResults if it's already # been fetched, e.g. from jobs.query first page of results. @@ -1425,7 +1431,14 @@ def _reload_query_results( # If an explicit timeout is not given, fall back to the transport timeout # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout + if timeout is not None: + transport_timeout = timeout + else: + transport_timeout = self._transport_timeout + + # Handle PollingJob._DEFAULT_VALUE. + if not isinstance(transport_timeout, (float, int)): + transport_timeout = None self._query_results = self._client._get_query_results( self.job_id, @@ -1434,6 +1447,7 @@ def _reload_query_results( timeout_ms=timeout_ms, location=self.location, timeout=transport_timeout, + page_size=page_size, ) def result( # type: ignore # (incompatible with supertype) @@ -1515,11 +1529,25 @@ def result( # type: ignore # (incompatible with supertype) # actually correspond to a finished query job. ) + # Setting max_results should be equivalent to setting page_size with + # regards to allowing the user to tune how many results to download + # while we wait for the query to finish. See internal issue: + # 344008814. + if page_size is None and max_results is not None: + page_size = max_results + # When timeout has default sentinel value ``object()``, do not pass # anything to invoke default timeouts in subsequent calls. - kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {} + done_kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {} + reload_query_results_kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {} + list_rows_kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {} if type(timeout) is not object: - kwargs["timeout"] = timeout + done_kwargs["timeout"] = timeout + list_rows_kwargs["timeout"] = timeout + reload_query_results_kwargs["timeout"] = timeout + + if page_size is not None: + reload_query_results_kwargs["page_size"] = page_size try: retry_do_query = getattr(self, "_retry_do_query", None) @@ -1562,7 +1590,7 @@ def is_job_done(): # rateLimitExceeded errors are ambiguous. We want to know if # the query job failed and not just the call to # jobs.getQueryResults. - if self.done(retry=retry, **kwargs): + if self.done(retry=retry, **done_kwargs): # If it's already failed, we might as well stop. job_failed_exception = self.exception() if job_failed_exception is not None: @@ -1599,14 +1627,16 @@ def is_job_done(): # response from the REST API. This ensures we aren't # making any extra API calls if the previous loop # iteration fetched the finished job. - self._reload_query_results(retry=retry, **kwargs) + self._reload_query_results( + retry=retry, **reload_query_results_kwargs + ) return True # Call jobs.getQueryResults with max results set to 0 just to # wait for the query to finish. Unlike most methods, # jobs.getQueryResults hangs as long as it can to ensure we # know when the query has finished as soon as possible. - self._reload_query_results(retry=retry, **kwargs) + self._reload_query_results(retry=retry, **reload_query_results_kwargs) # Even if the query is finished now according to # jobs.getQueryResults, we'll want to reload the job status if @@ -1679,8 +1709,9 @@ def is_job_done(): # We know that there's at least 1 row, so only treat the response from # jobs.getQueryResults / jobs.query as the first page of the # RowIterator response if there are any rows in it. This prevents us - # from stopping the iteration early because we're missing rows and - # there's no next page token. + # from stopping the iteration early in the cases where we set + # maxResults=0. In that case, we're missing rows and there's no next + # page token. first_page_response = self._query_results._properties if "rows" not in first_page_response: first_page_response = None @@ -1699,7 +1730,7 @@ def is_job_done(): query_id=self.query_id, first_page_response=first_page_response, num_dml_affected_rows=self._query_results.num_dml_affected_rows, - **kwargs, + **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 57fc0d2be..faf827be4 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1759,7 +1759,7 @@ def _get_next_page_response(self): if self._page_size is not None: if self.page_number and "startIndex" in params: del params["startIndex"] - params["maxResults"] = self._page_size + return self.api_request( method=self._HTTP_METHOD, path=self.path, query_params=params ) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index c7b2c5f9c..66055dee1 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -28,6 +28,7 @@ from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery._job_helpers import google.cloud.bigquery.query +import google.cloud.bigquery.retry from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT from google.cloud.bigquery.table import _EmptyRowIterator @@ -841,6 +842,22 @@ def test_search_stats(self): assert isinstance(job.search_stats, SearchStats) assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED" + def test_reload_query_results_uses_transport_timeout(self): + conn = make_connection({}) + client = _make_client(self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, self.QUERY, client) + job._transport_timeout = 123 + + job._reload_query_results() + + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" + conn.api_request.assert_called_once_with( + method="GET", + path=query_results_path, + query_params={"maxResults": 0}, + timeout=123, + ) + def test_result_reloads_job_state_until_done(self): """Verify that result() doesn't return until state == 'DONE'. @@ -1053,7 +1070,7 @@ def test_result_with_done_job_calls_get_query_results(self): method="GET", path=query_results_path, query_params={"maxResults": 0, "location": "EU"}, - timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + timeout=None, ) query_results_page_call = mock.call( method="GET", @@ -1139,114 +1156,92 @@ def test_result_with_none_timeout(self): timeout=None, ) - def test_result_with_done_jobs_query_response_and_page_size_invalidates_cache(self): - """We don't call jobs.query with a page size, so if the user explicitly - requests a certain size, invalidate the cache. - """ - # Arrange - job_resource = self._make_resource( - started=True, ended=True, location="asia-northeast1" - ) - query_resource_done = { + def test_result_with_max_results(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "rows": [{"f": [{"v": "abc"}]}], - "pageToken": "initial-page-token-shouldnt-be-used", - "totalRows": "4", + "totalRows": "10", + "pageToken": "first-page-token", + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + {"f": [{"v": "jkl"}]}, + {"f": [{"v": "mno"}]}, + {"f": [{"v": "pqr"}]}, + # Pretend these are very large rows, so the API doesn't return + # all of the rows we asked for in the first response. + ], } query_page_resource = { - "totalRows": 4, - "pageToken": "some-page-token", + "totalRows": "10", + "pageToken": None, "rows": [ - {"f": [{"v": "row1"}]}, - {"f": [{"v": "row2"}]}, - {"f": [{"v": "row3"}]}, + {"f": [{"v": "stu"}]}, + {"f": [{"v": "vwx"}]}, + {"f": [{"v": "yz0"}]}, ], } - query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} - conn = make_connection(job_resource, query_page_resource, query_page_resource_2) - client = _make_client(self.PROJECT, connection=conn) - job = google.cloud.bigquery._job_helpers._to_query_job( - client, - "SELECT col1 FROM table", - request_config=None, - query_response=query_resource_done, + job_resource_running = self._make_resource( + started=True, ended=False, location="US" ) - # We want job.result() to refresh the job state, so the conversion is - # always "PENDING", even if the job is finished. - assert job.state == "PENDING" + job_resource_done = self._make_resource(started=True, ended=True, location="US") + conn = make_connection(job_resource_done, query_resource, query_page_resource) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource_running, client) - # Act - result = job.result(page_size=3) + max_results = 9 + result = job.result(max_results=max_results) - # Assert - actual_rows = list(result) - self.assertEqual(len(actual_rows), 4) + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 10) + + rows = list(result) + self.assertEqual(len(rows), 9) + jobs_get_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}" + jobs_get_call = mock.call( + method="GET", + path=jobs_get_path, + query_params={"projection": "full", "location": "US"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" - query_page_1_call = mock.call( + query_page_waiting_call = mock.call( method="GET", path=query_results_path, query_params={ - "maxResults": 3, - "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, - "location": "asia-northeast1", + # Waiting for the results should set maxResults and cache the + # first page if page_size is set. This allows customers to + # more finely tune when we fallback to the BQ Storage API. + # See internal issue: 344008814. + "maxResults": max_results, "formatOptions.useInt64Timestamp": True, + "location": "US", }, timeout=None, ) query_page_2_call = mock.call( + timeout=None, method="GET", path=query_results_path, query_params={ - "pageToken": "some-page-token", + "pageToken": "first-page-token", "maxResults": 3, "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, - "location": "asia-northeast1", + "location": "US", "formatOptions.useInt64Timestamp": True, }, - timeout=None, ) - conn.api_request.assert_has_calls([query_page_1_call, query_page_2_call]) - - def test_result_with_max_results(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "5", - } - query_page_resource = { - "totalRows": "5", - "pageToken": None, - "rows": [ - {"f": [{"v": "abc"}]}, - {"f": [{"v": "def"}]}, - {"f": [{"v": "ghi"}]}, - ], - } - connection = make_connection(query_resource, query_page_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - max_results = 3 - - result = job.result(max_results=max_results) - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 5) - - rows = list(result) - - self.assertEqual(len(rows), 3) - self.assertEqual(len(connection.api_request.call_args_list), 2) - query_page_request = connection.api_request.call_args_list[1] - self.assertEqual( - query_page_request[1]["query_params"]["maxResults"], max_results + # Waiting for the results should set maxResults and cache the + # first page if max_results is set. This allows customers to + # more finely tune when we fallback to the BQ Storage API. + # See internal issue: 344008814. + conn.api_request.assert_has_calls( + [jobs_get_call, query_page_waiting_call, query_page_2_call] ) def test_result_w_custom_retry(self): @@ -1469,63 +1464,85 @@ def test_result_w_page_size(self): "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "4", - } - job_resource = self._make_resource(started=True, ended=True, location="US") - q_config = job_resource["configuration"]["query"] - q_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - } - query_page_resource = { - "totalRows": 4, - "pageToken": "some-page-token", + "totalRows": "10", "rows": [ {"f": [{"v": "row1"}]}, {"f": [{"v": "row2"}]}, {"f": [{"v": "row3"}]}, + {"f": [{"v": "row4"}]}, + {"f": [{"v": "row5"}]}, + {"f": [{"v": "row6"}]}, + {"f": [{"v": "row7"}]}, + {"f": [{"v": "row8"}]}, + {"f": [{"v": "row9"}]}, ], + "pageToken": "first-page-token", } - query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + job_resource_running = self._make_resource( + started=True, ended=False, location="US" + ) + job_resource_done = self._make_resource(started=True, ended=True, location="US") + destination_table = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + } + q_config = job_resource_done["configuration"]["query"] + q_config["destinationTable"] = destination_table + query_page_resource_2 = {"totalRows": 10, "rows": [{"f": [{"v": "row10"}]}]} conn = make_connection( - query_results_resource, query_page_resource, query_page_resource_2 + job_resource_running, + query_results_resource, + job_resource_done, + query_page_resource_2, ) client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) + job = self._get_target_class().from_api_repr(job_resource_running, client) # Act - result = job.result(page_size=3) + result = job.result(page_size=9) # Assert actual_rows = list(result) - self.assertEqual(len(actual_rows), 4) + self.assertEqual(len(actual_rows), 10) + jobs_get_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}" + jobs_get_call = mock.call( + method="GET", + path=jobs_get_path, + query_params={"projection": "full", "location": "US"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" - query_page_1_call = mock.call( + query_page_waiting_call = mock.call( method="GET", path=query_results_path, query_params={ - "maxResults": 3, - "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + # Waiting for the results should set maxResults and cache the + # first page if page_size is set. This allows customers to + # more finely tune when we fallback to the BQ Storage API. + # See internal issue: 344008814. + "maxResults": 9, "location": "US", "formatOptions.useInt64Timestamp": True, }, timeout=None, ) query_page_2_call = mock.call( + timeout=None, method="GET", path=query_results_path, query_params={ - "pageToken": "some-page-token", - "maxResults": 3, + "pageToken": "first-page-token", + "maxResults": 9, "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "US", "formatOptions.useInt64Timestamp": True, }, - timeout=None, ) - conn.api_request.assert_has_calls([query_page_1_call, query_page_2_call]) + conn.api_request.assert_has_calls( + [jobs_get_call, query_page_waiting_call, jobs_get_call, query_page_2_call] + ) def test_result_with_start_index(self): from google.cloud.bigquery.table import RowIterator diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ed5575f6c..cd336b73f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -58,6 +58,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions +import google.cloud.bigquery.retry from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import google.cloud.bigquery.table @@ -5444,6 +5445,127 @@ def test_query_and_wait_w_location(self): sent = req["data"] self.assertEqual(sent["location"], "not-the-client-location") + def test_query_and_wait_w_max_results(self): + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait(query, max_results=11) + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries") + sent = req["data"] + self.assertTrue(sent["formatOptions"]["useInt64Timestamp"]) + self.assertTrue(sent["maxResults"], 11) + + def test_query_and_wait_w_page_size(self): + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + jobs_query_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(jobs_query_response) + + _ = client.query_and_wait(query, page_size=11) + + # Verify the request we send is to jobs.query. + conn.api_request.assert_called_once() + _, req = conn.api_request.call_args + self.assertEqual(req["method"], "POST") + self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries") + sent = req["data"] + self.assertTrue(sent["formatOptions"]["useInt64Timestamp"]) + self.assertTrue(sent["maxResults"], 11) + + def test_query_and_wait_w_page_size_multiple_requests(self): + """ + For queries that last longer than the intial (about 10s) call to + jobs.query, we should still pass through the page size to the + subsequent calls to jobs.getQueryResults. + + See internal issue 344008814. + """ + query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" + job_reference = { + "projectId": "my-jobs-project", + "location": "my-jobs-location", + "jobId": "my-jobs-id", + } + jobs_query_response = { + "jobComplete": False, + "jobReference": job_reference, + } + jobs_get_response = { + "jobReference": job_reference, + "status": {"state": "DONE"}, + } + get_query_results_response = { + "jobComplete": True, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + jobs_query_response, + jobs_get_response, + get_query_results_response, + ) + + _ = client.query_and_wait(query, page_size=11) + + conn.api_request.assert_has_calls( + [ + # Verify the request we send is to jobs.query. + mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/queries", + data={ + "useLegacySql": False, + "query": query, + "formatOptions": {"useInt64Timestamp": True}, + "maxResults": 11, + "requestId": mock.ANY, + }, + timeout=None, + ), + # jobs.get: Check if the job has finished. + mock.call( + method="GET", + path="/projects/my-jobs-project/jobs/my-jobs-id", + query_params={ + "projection": "full", + "location": "my-jobs-location", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ), + # jobs.getQueryResults: wait for the query / fetch first page + mock.call( + method="GET", + path="/projects/my-jobs-project/queries/my-jobs-id", + query_params={ + # We should still pass through the page size to the + # subsequent calls to jobs.getQueryResults. + # + # See internal issue 344008814. + "maxResults": 11, + "formatOptions.useInt64Timestamp": True, + "location": "my-jobs-location", + }, + timeout=None, + ), + ] + ) + def test_query_and_wait_w_project(self): query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`" jobs_query_response = { diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 46eb1d6b3..298ab9a56 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -247,7 +247,7 @@ def make_job_id(*args, **kwargs): method="GET", path="/projects/PROJECT/queries/1", query_params={"maxResults": 0, "location": "test-loc"}, - timeout=120, + timeout=None, ), ], ) From 8f5b4b70423c277ffd559d2034bc0b2b5fb93169 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Tue, 4 Jun 2024 07:25:55 -0700 Subject: [PATCH 321/536] fix: create query job in job.result() if doesn't exist (#1944) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: create query job in job.result() if doesn't exist * Apply suggestions from code review --------- Co-authored-by: Tim Sweña (Swast) --- google/cloud/bigquery/job/query.py | 5 ++ tests/unit/job/test_query.py | 83 ++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index a8530271a..8049b748e 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1585,6 +1585,11 @@ def is_job_done(): self._retry_do_query = retry_do_query self._job_retry = job_retry + # If the job hasn't been created, create it now. Related: + # https://github.com/googleapis/python-bigquery/issues/1940 + if self.state is None: + self._begin(retry=retry, **done_kwargs) + # Refresh the job status with jobs.get because some of the # exceptions thrown by jobs.getQueryResults like timeout and # rateLimitExceeded errors are ambiguous. We want to know if diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 66055dee1..5b69c98cf 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -1037,6 +1037,86 @@ def test_result_dry_run(self): self.assertIsNone(result.job_id) self.assertIsNone(result.query_id) + # If the job doesn't exist, create the job first. Issue: + # https://github.com/googleapis/python-bigquery/issues/1940 + def test_result_begin_job_if_not_exist(self): + begun_resource = self._make_resource() + query_running_resource = { + "jobComplete": True, + "jobReference": { + "projectId": self.PROJECT, + "jobId": self.JOB_ID, + "location": "US", + }, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "status": {"state": "RUNNING"}, + } + query_done_resource = { + "jobComplete": True, + "jobReference": { + "projectId": self.PROJECT, + "jobId": self.JOB_ID, + "location": "US", + }, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "status": {"state": "DONE"}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = make_connection( + begun_resource, + query_running_resource, + query_done_resource, + done_resource, + ) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["jobReference"]["location"] = "US" + + job.result() + + create_job_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={ + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": "US", + }, + "configuration": { + "query": {"useLegacySql": False, "query": self.QUERY}, + }, + }, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={"projection": "full", "location": "US"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=None, + ) + + connection.api_request.assert_has_calls( + [ + # Make sure we start a job that hasn't started yet. See: + # https://github.com/googleapis/python-bigquery/issues/1940 + create_job_call, + reload_call, + get_query_results_call, + reload_call, + ] + ) + def test_result_with_done_job_calls_get_query_results(self): query_resource_done = { "jobComplete": True, @@ -1379,6 +1459,7 @@ def test_result_w_timeout_doesnt_raise(self): client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) job._properties["jobReference"]["location"] = "US" + job._properties["status"] = {"state": "RUNNING"} with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): job.result( @@ -1429,6 +1510,7 @@ def test_result_w_timeout_raises_concurrent_futures_timeout(self): client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) job._properties["jobReference"]["location"] = "US" + job._properties["status"] = {"state": "RUNNING"} with freezegun.freeze_time( "1970-01-01 00:00:00", auto_tick_seconds=1.0 @@ -2319,5 +2401,6 @@ def test_iter(self): connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["status"] = {"state": "RUNNING"} self.assertIsInstance(iter(job), types.GeneratorType) From cc7b3995eef4f357f031e06389db07f949987eeb Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 13:27:44 -0700 Subject: [PATCH 322/536] chore(main): release 3.24.0 (#1928) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9a2c3149..c24725bef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.24.0](https://github.com/googleapis/python-bigquery/compare/v3.23.1...v3.24.0) (2024-06-04) + + +### Features + +* Add default timeout for Client.get_job() ([#1935](https://github.com/googleapis/python-bigquery/issues/1935)) ([9fbad76](https://github.com/googleapis/python-bigquery/commit/9fbad767cc228e02040436742d0cb6743d370b90)) +* Add support for map target type in Parquet options ([#1919](https://github.com/googleapis/python-bigquery/issues/1919)) ([c3f7b23](https://github.com/googleapis/python-bigquery/commit/c3f7b237383d4705ed6e720544728c4db61f6c83)) + + +### Bug Fixes + +* Create query job in job.result() if doesn't exist ([#1944](https://github.com/googleapis/python-bigquery/issues/1944)) ([8f5b4b7](https://github.com/googleapis/python-bigquery/commit/8f5b4b70423c277ffd559d2034bc0b2b5fb93169)) +* Retry `is_job_done` on `ConnectionError` ([#1930](https://github.com/googleapis/python-bigquery/issues/1930)) ([4f72723](https://github.com/googleapis/python-bigquery/commit/4f72723f539d35977bc52c5950f6e00889b5c7be)) + + +### Performance Improvements + +* If `page_size` or `max_results` is set on `QueryJob.result()`, use to download first page of results ([#1942](https://github.com/googleapis/python-bigquery/issues/1942)) ([3e7a48d](https://github.com/googleapis/python-bigquery/commit/3e7a48d36e3c7bf6abe1b5550097178f6ca6e174)) + ## [3.23.1](https://github.com/googleapis/python-bigquery/compare/v3.23.0...v3.23.1) (2024-05-21) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a62f73ed4..79c15cf23 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.23.1" +__version__ = "3.24.0" From bfdeb3fdbc1d5b26fcd3d1433abfb0be49d12018 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 10 Jun 2024 11:49:02 -0700 Subject: [PATCH 323/536] feat: add prefer_bqstorage_client option for Connection (#1945) --- google/cloud/bigquery/dbapi/connection.py | 30 +++++++++++++++-------- tests/unit/test_dbapi_connection.py | 20 +++++++++++++++ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigquery/dbapi/connection.py b/google/cloud/bigquery/dbapi/connection.py index 66dee7dfb..a1a69b8fe 100644 --- a/google/cloud/bigquery/dbapi/connection.py +++ b/google/cloud/bigquery/dbapi/connection.py @@ -35,12 +35,18 @@ class Connection(object): A client that uses the faster BigQuery Storage API to fetch rows from BigQuery. If not passed, it is created using the same credentials as ``client`` (provided that BigQuery Storage dependencies are installed). - - If both clients are available, ``bqstorage_client`` is used for - fetching query results. + prefer_bqstorage_client (Optional[bool]): + Prefer the BigQuery Storage client over the REST client. If Storage + client isn't available, fall back to the REST client. Defaults to + ``True``. """ - def __init__(self, client=None, bqstorage_client=None): + def __init__( + self, + client=None, + bqstorage_client=None, + prefer_bqstorage_client=True, + ): if client is None: client = bigquery.Client() self._owns_client = True @@ -49,7 +55,10 @@ def __init__(self, client=None, bqstorage_client=None): # A warning is already raised by the BQ Storage client factory factory if # instantiation fails, or if the given BQ Storage client instance is outdated. - if bqstorage_client is None: + if not prefer_bqstorage_client: + bqstorage_client = None + self._owns_bqstorage_client = False + elif bqstorage_client is None: bqstorage_client = client._ensure_bqstorage_client() self._owns_bqstorage_client = bqstorage_client is not None else: @@ -95,7 +104,7 @@ def cursor(self): return new_cursor -def connect(client=None, bqstorage_client=None): +def connect(client=None, bqstorage_client=None, prefer_bqstorage_client=True): """Construct a DB-API connection to Google BigQuery. Args: @@ -108,11 +117,12 @@ def connect(client=None, bqstorage_client=None): A client that uses the faster BigQuery Storage API to fetch rows from BigQuery. If not passed, it is created using the same credentials as ``client`` (provided that BigQuery Storage dependencies are installed). - - If both clients are available, ``bqstorage_client`` is used for - fetching query results. + prefer_bqstorage_client (Optional[bool]): + Prefer the BigQuery Storage client over the REST client. If Storage + client isn't available, fall back to the REST client. Defaults to + ``True``. Returns: google.cloud.bigquery.dbapi.Connection: A new DB-API connection to BigQuery. """ - return Connection(client, bqstorage_client) + return Connection(client, bqstorage_client, prefer_bqstorage_client) diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 4071e57e0..f5c77c448 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -122,6 +122,26 @@ def test_connect_w_both_clients(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) + def test_connect_prefer_bqstorage_client_false(self): + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery.dbapi import Connection + + mock_client = self._mock_client() + mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + + connection = connect( + client=mock_client, + bqstorage_client=mock_bqstorage_client, + prefer_bqstorage_client=False, + ) + + mock_client._ensure_bqstorage_client.assert_not_called() + self.assertIsInstance(connection, Connection) + self.assertIs(connection._client, mock_client) + self.assertIs(connection._bqstorage_client, None) + def test_raises_error_if_closed(self): from google.cloud.bigquery.dbapi.exceptions import ProgrammingError From 19bef8d2f902ddafd32dfb34641729e25639fbd7 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 12 Jun 2024 19:55:43 -0400 Subject: [PATCH 324/536] test: update the results of test based on change to hacker news data (#1949) * test: update the results of test based on change to hacker news data * Update tests/system/test_client.py --------- Co-authored-by: Lingqing Gan --- tests/system/test_client.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 862ef3245..95c679a14 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1788,20 +1788,35 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): # in the sorted order. expected_data = [ + [ + ("by", "pg"), + ("id", 1), + ( + "timestamp", + datetime.datetime( + 2006, 10, 9, 18, 21, 51, tzinfo=datetime.timezone.utc + ), + ), + ], [ ("by", "phyllis"), ("id", 2), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 30, 28, tzinfo=UTC)), + ( + "timestamp", + datetime.datetime( + 2006, 10, 9, 18, 30, 28, tzinfo=datetime.timezone.utc + ), + ), ], [ ("by", "phyllis"), ("id", 3), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 40, 33, tzinfo=UTC)), - ], - [ - ("by", "onebeerdave"), - ("id", 4), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 47, 42, tzinfo=UTC)), + ( + "timestamp", + datetime.datetime( + 2006, 10, 9, 18, 40, 33, tzinfo=datetime.timezone.utc + ), + ), ], ] From 7d757b84da119f45647041196e4921556793404b Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 13 Jun 2024 13:35:20 +0200 Subject: [PATCH 325/536] chore(deps): update all dependencies (#1946) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 12 ++++++------ samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index a6c397822..8f0bfaad4 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index b35cc414c..25ed0977b 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.23.1 +google-cloud-bigquery==3.24.0 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 64d436dcf..b35a54a76 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index becaaf50a..e7c59ce4b 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==23.2.0 -certifi==2024.2.2 +certifi==2024.6.2 cffi===1.15.1; python_version == '3.7' cffi==1.16.0; python_version >= '3.8' charset-normalizer==3.3.2 @@ -14,15 +14,15 @@ geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==0.14.4; python_version >= '3.9' google-api-core==2.19.0 -google-auth==2.29.0 -google-cloud-bigquery==3.23.1 +google-auth==2.30.0 +google-cloud-bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 -googleapis-common-protos==1.63.0 +googleapis-common-protos==1.63.1 grpcio===1.62.2; python_version == '3.7' -grpcio==1.64.0; python_version >= '3.8' +grpcio==1.64.1; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 @@ -49,7 +49,7 @@ rsa==4.9 Shapely==2.0.4 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.12.0; python_version >= '3.8' +typing-extensions==4.12.1; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index a6c397822..8f0bfaad4 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index e3a225b79..00f0b15d0 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.23.1 +google.cloud.bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index a6c397822..8f0bfaad4 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index f774ea183..91a4a87e6 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.23.1 +google-cloud-bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index bd1ba5028..b65023b00 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 89fe16387..054fa2658 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.23.1 +google-cloud-bigquery==3.24.0 From 5d10f1e94222e635cf008d83928555a628cdbb3f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 13 Jun 2024 22:34:45 +0200 Subject: [PATCH 326/536] chore(deps): update all dependencies (#1954) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e7c59ce4b..2b3e4713e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -19,14 +19,15 @@ google-cloud-bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 -google-resumable-media==2.7.0 +google-resumable-media==2.7.1 googleapis-common-protos==1.63.1 grpcio===1.62.2; python_version == '3.7' grpcio==1.64.1; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 -packaging==24.0 +packaging===24.0; python_version == '3.7' +packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' @@ -49,7 +50,7 @@ rsa==4.9 Shapely==2.0.4 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.12.1; python_version >= '3.8' +typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' From 7e522eea776cd9a74f8078c4236f63d5ff11f20e Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 14 Jun 2024 10:22:49 -0700 Subject: [PATCH 327/536] feat: support load job option ColumnNameCharacterMap (#1952) * feat: support load job option ColumnNameCharacterMap * add unit test --- google/cloud/bigquery/job/load.py | 41 ++++++++++++++++++++++++++++++ tests/unit/job/test_load_config.py | 39 ++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 176435456..e56ce16f0 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -32,6 +32,26 @@ from google.cloud.bigquery.query import ConnectionProperty +class ColumnNameCharacterMap: + """Indicates the character map used for column names. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap + """ + + COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED = "COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED" + """Unspecified column name character map.""" + + STRICT = "STRICT" + """Support flexible column name and reject invalid column names.""" + + V1 = "V1" + """ Support alphanumeric + underscore characters and names must start with + a letter or underscore. Invalid column names will be normalized.""" + + V2 = "V2" + """Support flexible column name. Invalid column names will be normalized.""" + + class LoadJobConfig(_JobConfig): """Configuration options for load jobs. @@ -597,6 +617,27 @@ def parquet_options(self, value): else: self._del_sub_prop("parquetOptions") + @property + def column_name_character_map(self) -> str: + """Optional[google.cloud.bigquery.job.ColumnNameCharacterMap]: + Character map supported for column names in CSV/Parquet loads. Defaults + to STRICT and can be overridden by Project Config Service. Using this + option with unsupported load formats will result in an error. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.column_name_character_map + """ + return self._get_sub_prop( + "columnNameCharacterMap", + ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, + ) + + @column_name_character_map.setter + def column_name_character_map(self, value: Optional[str]): + if value is None: + value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED + self._set_sub_prop("columnNameCharacterMap", value) + class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table. diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index e1fa2641f..becf3e959 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -843,3 +843,42 @@ def test_parquet_options_setter_clearing(self): config.parquet_options = None self.assertNotIn("parquetOptions", config._properties["load"]) + + def test_column_name_character_map_missing(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + self.assertEqual( + config.column_name_character_map, + ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, + ) + + def test_column_name_character_map_hit(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + config._properties["load"]["columnNameCharacterMap"] = "STRICT" + self.assertEqual( + config.column_name_character_map, + ColumnNameCharacterMap.STRICT, + ) + + def test_column_name_character_map_setter(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + config.column_name_character_map = "V1" + self.assertEqual( + config._properties["load"]["columnNameCharacterMap"], + ColumnNameCharacterMap.V1, + ) + + def test_column_name_character_map_none(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + config.column_name_character_map = None + self.assertEqual( + config._properties["load"]["columnNameCharacterMap"], + ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, + ) From 7d0fceefdf28278c1f2cdaab571de9b235320998 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 17 Jun 2024 10:20:56 -0700 Subject: [PATCH 328/536] fix: do not overwrite page_size with max_results when start_index is set (#1956) * fix: do not overwrite page_size with max_results when start_index is set * update test --- google/cloud/bigquery/job/query.py | 5 +++-- tests/unit/job/test_query.py | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 8049b748e..4ea5687e0 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1532,8 +1532,9 @@ def result( # type: ignore # (incompatible with supertype) # Setting max_results should be equivalent to setting page_size with # regards to allowing the user to tune how many results to download # while we wait for the query to finish. See internal issue: - # 344008814. - if page_size is None and max_results is not None: + # 344008814. But if start_index is set, user is trying to access a + # specific page, so we don't need to set page_size. See issue #1950. + if page_size is None and max_results is not None and start_index is None: page_size = max_results # When timeout has default sentinel value ``object()``, do not pass diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 5b69c98cf..4bbd31c73 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -1652,7 +1652,17 @@ def test_result_with_start_index(self): start_index = 1 - result = job.result(start_index=start_index) + # Verifies that page_size isn't overwritten by max_results when + # start_index is not None. See + # https://github.com/googleapis/python-bigquery/issues/1950 + page_size = 10 + max_results = 100 + + result = job.result( + page_size=page_size, + max_results=max_results, + start_index=start_index, + ) self.assertIsInstance(result, RowIterator) self.assertEqual(result.total_rows, 5) @@ -1665,6 +1675,9 @@ def test_result_with_start_index(self): self.assertEqual( tabledata_list_request[1]["query_params"]["startIndex"], start_index ) + self.assertEqual( + tabledata_list_request[1]["query_params"]["maxResults"], page_size + ) def test_result_error(self): from google.cloud import exceptions From e731b408ad97e12b7a90dd9ee8cd0251717c0f5b Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 10:05:28 -0700 Subject: [PATCH 329/536] chore(main): release 3.25.0 (#1947) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c24725bef..4a089b8b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.25.0](https://github.com/googleapis/python-bigquery/compare/v3.24.0...v3.25.0) (2024-06-17) + + +### Features + +* Add prefer_bqstorage_client option for Connection ([#1945](https://github.com/googleapis/python-bigquery/issues/1945)) ([bfdeb3f](https://github.com/googleapis/python-bigquery/commit/bfdeb3fdbc1d5b26fcd3d1433abfb0be49d12018)) +* Support load job option ColumnNameCharacterMap ([#1952](https://github.com/googleapis/python-bigquery/issues/1952)) ([7e522ee](https://github.com/googleapis/python-bigquery/commit/7e522eea776cd9a74f8078c4236f63d5ff11f20e)) + + +### Bug Fixes + +* Do not overwrite page_size with max_results when start_index is set ([#1956](https://github.com/googleapis/python-bigquery/issues/1956)) ([7d0fcee](https://github.com/googleapis/python-bigquery/commit/7d0fceefdf28278c1f2cdaab571de9b235320998)) + ## [3.24.0](https://github.com/googleapis/python-bigquery/compare/v3.23.1...v3.24.0) (2024-06-04) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 79c15cf23..fed077e26 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.24.0" +__version__ = "3.25.0" From 78ff5067a2695a464076aca09afe66aadbcbe63a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 20 Jun 2024 20:18:34 +0200 Subject: [PATCH 330/536] chore(deps): update dependency urllib3 to v2.2.2 [security] (#1959) Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 2b3e4713e..b6cc8912e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -53,4 +53,4 @@ typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.1; python_version >= '3.8' +urllib3==2.2.2; python_version >= '3.8' From b146060ae14df58c54cce42ca3cca42ac91c0b5e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 22 Jun 2024 01:47:26 +0200 Subject: [PATCH 331/536] chore(deps): update all dependencies (#1955) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 25ed0977b..6a3d17c6f 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.24.0 +google-cloud-bigquery==3.25.0 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index b6cc8912e..fd943cc83 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -15,7 +15,7 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.4; python_version >= '3.9' google-api-core==2.19.0 google-auth==2.30.0 -google-cloud-bigquery==3.24.0 +google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 @@ -31,7 +31,7 @@ packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' -proto-plus==1.23.0 +proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==16.1.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 00f0b15d0..a5b4e2aaa 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.24.0 +google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 91a4a87e6..78945d28b 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.24.0 +google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 054fa2658..9e181d963 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.24.0 +google-cloud-bigquery==3.25.0 From cecf01c9e5e7372ec9989c2a645473f911a4b372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 9 Jul 2024 15:07:44 -0500 Subject: [PATCH 332/536] chore: remove references to conda (#1971) --- noxfile.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/noxfile.py b/noxfile.py index 5f88e46a0..a2df2e094 100644 --- a/noxfile.py +++ b/noxfile.py @@ -339,14 +339,6 @@ def prerelease_deps(session): "--upgrade", "pyarrow", ) - session.install( - "--extra-index-url", - "https://pypi.anaconda.org/scipy-wheels-nightly/simple", - "--prefer-binary", - "--pre", - "--upgrade", - "pandas", - ) session.install( "--pre", "--upgrade", @@ -355,6 +347,7 @@ def prerelease_deps(session): "ipywidgets", "tqdm", "git+https://github.com/pypa/packaging.git", + "pandas", ) session.install( From ad1e130678f2eeadf395a8dcc611b0eb90d56c37 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 9 Jul 2024 22:49:22 +0200 Subject: [PATCH 333/536] chore(deps): update dependency certifi to v2024.7.4 [security] (#1968) Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index fd943cc83..add61f286 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==23.2.0 -certifi==2024.6.2 +certifi==2024.7.4 cffi===1.15.1; python_version == '3.7' cffi==1.16.0; python_version >= '3.8' charset-normalizer==3.3.2 From a958732aed7d9bd51ffde3dc0e6cae9ad7455b54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 10 Jul 2024 13:20:06 -0500 Subject: [PATCH 334/536] deps: bump min version of google-api-core and google-cloud-core to 2.x (#1972) Fixes constraints file to match setup.py --- setup.py | 16 +++++----------- testing/constraints-3.7.txt | 6 +++--- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/setup.py b/setup.py index ed9a6351b..db8e06113 100644 --- a/setup.py +++ b/setup.py @@ -29,18 +29,12 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - # NOTE: Maintainers, please do not require google-api-core>=2.x.x - # Until this issue is closed - # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-api-core[grpc] >= 1.34.1, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,!=2.10.*", + "google-api-core[grpc] >= 2.11.1, <3.0.0dev", "google-auth >= 2.14.1, <3.0.0dev", - # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x - # Until this issue is closed - # https://github.com/googleapis/google-cloud-python/issues/10566 - "google-cloud-core >= 1.6.0, <3.0.0dev", - "google-resumable-media >= 0.6.0, < 3.0dev", + "google-cloud-core >= 2.4.1, <3.0.0dev", + "google-resumable-media >= 2.0.0, < 3.0dev", "packaging >= 20.0.0", - "python-dateutil >= 2.7.2, <3.0dev", + "python-dateutil >= 2.7.3, <3.0dev", "requests >= 2.21.0, < 3.0.0dev", ] pyarrow_dependency = "pyarrow >= 3.0.0" @@ -82,7 +76,7 @@ "opentelemetry-instrumentation >= 0.20b0", ], "bigquery_v2": [ - "proto-plus >= 1.15.0, <2.0.0dev", + "proto-plus >= 1.22.0, <2.0.0dev", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. ], } diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index d64e06cc3..c09978d5d 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -7,13 +7,13 @@ # Then this file should have foo==1.14.0 db-dtypes==0.3.0 geopandas==0.9.0 -google-api-core==2.17.1 -google-auth==2.28.1 +google-api-core==2.11.1 +google-auth==2.14.1 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-cloud-testutils==1.4.0 google-crc32c==1.5.0 -google-resumable-media==2.7.0 +google-resumable-media==2.0.0 googleapis-common-protos==1.62.0 grpcio==1.47.0 grpcio-status==1.47.0 From 60128a522375823422f238312521a2ce356d9177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 10 Jul 2024 14:02:08 -0500 Subject: [PATCH 335/536] feat: use `bigquery-magics` package for the `%%bigquery` magic (#1965) * feat: use `bigquery-magics` package for the `%%bigquery` magic * ignore types on bigquery-magics package * Update samples/magics/noxfile_config.py Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/__init__.py | 22 +- google/cloud/bigquery/magics/magics.py | 81 ++---- samples/magics/noxfile_config.py | 37 +++ samples/magics/query.py | 2 +- samples/magics/query_params_scalars.py | 2 +- samples/magics/requirements.txt | 1 + samples/notebooks/jupyter_tutorial_test.py | 2 +- samples/notebooks/requirements.txt | 1 + setup.py | 3 +- testing/constraints-3.7.txt | 1 + tests/system/test_magics.py | 5 +- tests/unit/test_magics.py | 291 +++++++++++++-------- 12 files changed, 262 insertions(+), 186 deletions(-) create mode 100644 samples/magics/noxfile_config.py diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index caf81d9aa..e80907ec9 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -27,6 +27,7 @@ - :class:`~google.cloud.bigquery.table.Table` represents a single "relation". """ +import warnings from google.cloud.bigquery import version as bigquery_version @@ -114,6 +115,11 @@ from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +try: + import bigquery_magics # type: ignore +except ImportError: + bigquery_magics = None + __all__ = [ "__version__", "Client", @@ -214,8 +220,16 @@ def load_ipython_extension(ipython): """Called by IPython when this module is loaded as an IPython extension.""" - from google.cloud.bigquery.magics.magics import _cell_magic - - ipython.register_magic_function( - _cell_magic, magic_kind="cell", magic_name="bigquery" + warnings.warn( + "%load_ext google.cloud.bigquery is deprecated. Install bigquery-magics package and use `%load_ext bigquery_magics`, instead.", + category=FutureWarning, ) + + if bigquery_magics is not None: + bigquery_magics.load_ipython_extension(ipython) + else: + from google.cloud.bigquery.magics.magics import _cell_magic + + ipython.register_magic_function( + _cell_magic, magic_kind="cell", magic_name="bigquery" + ) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 6e6b21965..b153d959a 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -14,70 +14,11 @@ """IPython Magics -.. function:: %%bigquery - - IPython cell magic to run a query and display the result as a DataFrame - - .. code-block:: python - - %%bigquery [] [--project ] [--use_legacy_sql] - [--verbose] [--params ] - - - Parameters: - - * ```` (Optional[line argument]): - variable to store the query results. The results are not displayed if - this parameter is used. If an error occurs during the query execution, - the corresponding ``QueryJob`` instance (if available) is stored in - the variable instead. - * ``--destination_table`` (Optional[line argument]): - A dataset and table to store the query results. If table does not exists, - it will be created. If table already exists, its data will be overwritten. - Variable should be in a format .. - * ``--no_query_cache`` (Optional[line argument]): - Do not use cached query results. - * ``--project `` (Optional[line argument]): - Project to use for running the query. Defaults to the context - :attr:`~google.cloud.bigquery.magics.Context.project`. - * ``--use_bqstorage_api`` (Optional[line argument]): - [Deprecated] Not used anymore, as BigQuery Storage API is used by default. - * ``--use_rest_api`` (Optional[line argument]): - Use the BigQuery REST API instead of the Storage API. - * ``--use_legacy_sql`` (Optional[line argument]): - Runs the query using Legacy SQL syntax. Defaults to Standard SQL if - this argument not used. - * ``--verbose`` (Optional[line argument]): - If this flag is used, information including the query job ID and the - amount of time for the query to complete will not be cleared after the - query is finished. By default, this information will be displayed but - will be cleared after the query is finished. - * ``--params `` (Optional[line argument]): - If present, the argument following the ``--params`` flag must be - either: - - * :class:`str` - A JSON string representation of a dictionary in the - format ``{"param_name": "param_value"}`` (ex. ``{"num": 17}``). Use - of the parameter in the query should be indicated with - ``@param_name``. See ``In[5]`` in the Examples section below. - - * :class:`dict` reference - A reference to a ``dict`` in the format - ``{"param_name": "param_value"}``, where the value types must be JSON - serializable. The variable reference is indicated by a ``$`` before - the variable name (ex. ``$my_dict_var``). See ``In[6]`` and ``In[7]`` - in the Examples section below. - - * ```` (required, cell argument): - SQL query to run. If the query does not contain any whitespace (aside - from leading and trailing whitespace), it is assumed to represent a - fully-qualified table ID, and the latter's data will be fetched. +Install ``bigquery-magics`` and call ``%load_ext bigquery_magics`` to use the +``%%bigquery`` cell magic. - Returns: - A :class:`pandas.DataFrame` with the query results. - - .. note:: - All queries run using this magic will run using the context - :attr:`~google.cloud.bigquery.magics.Context.credentials`. +See the `BigQuery Magics reference documentation +`_. """ from __future__ import print_function @@ -109,6 +50,11 @@ from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.magics import line_arg_parser as lap +try: + import bigquery_magics # type: ignore +except ImportError: + bigquery_magics = None + IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) @@ -280,7 +226,14 @@ def progress_bar_type(self, value): self._progress_bar_type = value -context = Context() +# If bigquery_magics is available, we load that extension rather than this one. +# Ensure google.cloud.bigquery.magics.context setters are on the correct magics +# implementation in case the user has installed the package but hasn't updated +# their code. +if bigquery_magics is not None: + context = bigquery_magics.context +else: + context = Context() def _handle_error(error, destination_var=None): diff --git a/samples/magics/noxfile_config.py b/samples/magics/noxfile_config.py new file mode 100644 index 000000000..982751b8b --- /dev/null +++ b/samples/magics/noxfile_config.py @@ -0,0 +1,37 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [ + "2.7", + ], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/samples/magics/query.py b/samples/magics/query.py index 4d3b4418b..0ac947db0 100644 --- a/samples/magics/query.py +++ b/samples/magics/query.py @@ -24,7 +24,7 @@ def query() -> "pandas.DataFrame": ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + ip.extension_manager.load_extension("bigquery_magics") sample = """ # [START bigquery_jupyter_query] diff --git a/samples/magics/query_params_scalars.py b/samples/magics/query_params_scalars.py index e833ef93b..74f665acb 100644 --- a/samples/magics/query_params_scalars.py +++ b/samples/magics/query_params_scalars.py @@ -24,7 +24,7 @@ def query_with_parameters() -> "pandas.DataFrame": ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + ip.extension_manager.load_extension("bigquery_magics") sample = """ # [START bigquery_jupyter_query_params_scalars] diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index a5b4e2aaa..a1044c231 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,3 +1,4 @@ +bigquery_magics==0.1.0 db-dtypes==1.2.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 diff --git a/samples/notebooks/jupyter_tutorial_test.py b/samples/notebooks/jupyter_tutorial_test.py index 9d42a4eda..2c2cf9390 100644 --- a/samples/notebooks/jupyter_tutorial_test.py +++ b/samples/notebooks/jupyter_tutorial_test.py @@ -60,7 +60,7 @@ def _strip_region_tags(sample_text: str) -> str: def test_jupyter_tutorial(ipython: "TerminalInteractiveShell") -> None: matplotlib.use("agg") ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + ip.extension_manager.load_extension("bigquery_magics") sample = """ # [START bigquery_jupyter_magic_gender_by_year] diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 78945d28b..3896a2aec 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,3 +1,4 @@ +bigquery-magics==0.1.0 db-dtypes==1.2.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 diff --git a/setup.py b/setup.py index db8e06113..9641fe695 100644 --- a/setup.py +++ b/setup.py @@ -66,8 +66,7 @@ ], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <3.0.0dev"], "ipython": [ - "ipython>=7.23.1,!=8.1.0", - "ipykernel>=6.0.0", + "bigquery-magics >= 0.1.0", ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index c09978d5d..fda7ce951 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 +bigquery-magics==0.1.0 db-dtypes==0.3.0 geopandas==0.9.0 google-api-core==2.11.1 diff --git a/tests/system/test_magics.py b/tests/system/test_magics.py index 3d761cd35..72d358a74 100644 --- a/tests/system/test_magics.py +++ b/tests/system/test_magics.py @@ -50,7 +50,10 @@ def test_bigquery_magic(ipython_interactive): current_process = psutil.Process() conn_count_start = len(current_process.connections()) - ip.extension_manager.load_extension("google.cloud.bigquery") + # Deprecated, but should still work in google-cloud-bigquery 3.x. + with pytest.warns(FutureWarning, match="bigquery_magics"): + ip.extension_manager.load_extension("google.cloud.bigquery") + sql = """ SELECT CONCAT( diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 4b1aaf14d..73b29df6b 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -155,9 +155,10 @@ def test_context_with_default_credentials(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_context_with_default_connection(): +def test_context_with_default_connection(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._credentials = None magics.context._project = None magics.context._connection = None @@ -218,9 +219,10 @@ def test_context_credentials_and_project_can_be_set_explicitly(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_context_with_custom_connection(): +def test_context_with_custom_connection(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None magics.context._credentials = None context_conn = magics.context._connection = make_connection( @@ -439,11 +441,9 @@ def test__create_dataset_if_necessary_not_exist(): @pytest.mark.usefixtures("ipython_interactive") def test_extension_load(): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") - # verify that the magic is registered and has the correct source - magic = ip.magics_manager.magics["cell"].get("bigquery") - assert magic.__module__ == "google.cloud.bigquery.magics.magics" + with pytest.warns(FutureWarning, match="bigquery_magics"): + bigquery.load_ipython_extension(ip) @pytest.mark.usefixtures("ipython_interactive") @@ -453,7 +453,8 @@ def test_extension_load(): ) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) mock_credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -494,9 +495,10 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_default_connection_user_agent(): +def test_bigquery_magic_default_connection_user_agent(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._connection = None credentials_mock = mock.create_autospec( @@ -519,9 +521,10 @@ def test_bigquery_magic_default_connection_user_agent(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_legacy_sql(): +def test_bigquery_magic_with_legacy_sql(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -538,9 +541,10 @@ def test_bigquery_magic_with_legacy_sql(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_result_saved_to_variable(ipython_ns_cleanup): +def test_bigquery_magic_with_result_saved_to_variable(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -571,9 +575,10 @@ def test_bigquery_magic_with_result_saved_to_variable(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): +def test_bigquery_magic_does_not_clear_display_in_verbose_mode(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -592,9 +597,10 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_clears_display_in_non_verbose_mode(): +def test_bigquery_magic_clears_display_in_non_verbose_mode(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -618,7 +624,8 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode(): ) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) mock_credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -686,7 +693,8 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): pandas = pytest.importorskip("pandas") ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) mock_credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -726,9 +734,10 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_max_results_invalid(): +def test_bigquery_magic_w_max_results_invalid(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -748,9 +757,10 @@ def test_bigquery_magic_w_max_results_invalid(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): +def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -782,9 +792,10 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_max_results_query_job_results_fails(): +def test_bigquery_magic_w_max_results_query_job_results_fails(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -819,9 +830,10 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(): assert close_transports.called -def test_bigquery_magic_w_table_id_invalid(): +def test_bigquery_magic_w_table_id_invalid(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -848,9 +860,10 @@ def test_bigquery_magic_w_table_id_invalid(): assert "Traceback (most recent call last)" not in output -def test_bigquery_magic_w_missing_query(): +def test_bigquery_magic_w_missing_query(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -873,9 +886,10 @@ def test_bigquery_magic_w_missing_query(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): +def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None ipython_ns_cleanup.append((ip, "df")) @@ -915,9 +929,10 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_table_id_and_bqstorage_client(): +def test_bigquery_magic_w_table_id_and_bqstorage_client(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -959,9 +974,10 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_dryrun_option_sets_job_config(): +def test_bigquery_magic_dryrun_option_sets_job_config(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -980,9 +996,10 @@ def test_bigquery_magic_dryrun_option_sets_job_config(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_dryrun_option_returns_query_job(): +def test_bigquery_magic_dryrun_option_returns_query_job(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1004,9 +1021,12 @@ def test_bigquery_magic_dryrun_option_returns_query_job(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_dryrun_option_variable_error_message(ipython_ns_cleanup): +def test_bigquery_magic_dryrun_option_variable_error_message( + ipython_ns_cleanup, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1031,9 +1051,12 @@ def test_bigquery_magic_dryrun_option_variable_error_message(ipython_ns_cleanup) @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(ipython_ns_cleanup): +def test_bigquery_magic_dryrun_option_saves_query_job_to_variable( + ipython_ns_cleanup, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1061,9 +1084,12 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(ipython_ns_cle @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_saves_query_job_to_variable_on_error(ipython_ns_cleanup): +def test_bigquery_magic_saves_query_job_to_variable_on_error( + ipython_ns_cleanup, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1094,9 +1120,10 @@ def test_bigquery_magic_saves_query_job_to_variable_on_error(ipython_ns_cleanup) @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_maximum_bytes_billed_invalid(): +def test_bigquery_magic_w_maximum_bytes_billed_invalid(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -1118,9 +1145,12 @@ def test_bigquery_magic_w_maximum_bytes_billed_invalid(): ) @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, expected): +def test_bigquery_magic_w_maximum_bytes_billed_overrides_context( + param_value, expected, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None # Set the default maximum bytes billed, so we know it's overridable by the param. @@ -1158,9 +1188,10 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): +def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None magics.context.default_query_job_config.maximum_bytes_billed = 1337 @@ -1195,9 +1226,10 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): +def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None magics.context.default_query_job_config = job.QueryJobConfig( @@ -1236,7 +1268,8 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_with_no_query_cache(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) conn = make_connection() monkeypatch.setattr(magics.context, "_connection", conn) monkeypatch.setattr(magics.context, "project", "project-from-context") @@ -1266,7 +1299,8 @@ def test_bigquery_magic_with_no_query_cache(monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_context_with_no_query_cache_from_context(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) conn = make_connection() monkeypatch.setattr(magics.context, "_connection", conn) monkeypatch.setattr(magics.context, "project", "project-from-context") @@ -1294,7 +1328,8 @@ def test_context_with_no_query_cache_from_context(monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None magics.context.progress_bar_type = "tqdm_gui" @@ -1338,9 +1373,10 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_progress_bar_type(): +def test_bigquery_magic_with_progress_bar_type(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.progress_bar_type = None run_query_patch = mock.patch( @@ -1358,9 +1394,10 @@ def test_bigquery_magic_with_progress_bar_type(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_project(): +def test_bigquery_magic_with_project(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -1382,9 +1419,10 @@ def test_bigquery_magic_with_project(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup): +def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._connection = None run_query_patch = mock.patch( @@ -1404,9 +1442,10 @@ def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_bigquery_api_endpoint_context_dict(): +def test_bigquery_magic_with_bigquery_api_endpoint_context_dict(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._connection = None magics.context.bigquery_client_options = {} @@ -1427,9 +1466,10 @@ def test_bigquery_magic_with_bigquery_api_endpoint_context_dict(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup): +def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._connection = None run_query_patch = mock.patch( @@ -1449,9 +1489,10 @@ def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict(): +def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._connection = None magics.context.bqstorage_client_options = {} @@ -1472,9 +1513,10 @@ def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_multiple_options(): +def test_bigquery_magic_with_multiple_options(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -1504,9 +1546,10 @@ def test_bigquery_magic_with_multiple_options(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_string_params(ipython_ns_cleanup): +def test_bigquery_magic_with_string_params(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1541,9 +1584,10 @@ def test_bigquery_magic_with_string_params(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params(ipython_ns_cleanup): +def test_bigquery_magic_with_dict_params(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1585,9 +1629,10 @@ def test_bigquery_magic_with_dict_params(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_nonexisting(): +def test_bigquery_magic_with_dict_params_nonexisting(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1600,9 +1645,10 @@ def test_bigquery_magic_with_dict_params_nonexisting(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_incorrect_syntax(): +def test_bigquery_magic_with_dict_params_incorrect_syntax(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1616,9 +1662,10 @@ def test_bigquery_magic_with_dict_params_incorrect_syntax(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_duplicate(): +def test_bigquery_magic_with_dict_params_duplicate(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1634,9 +1681,10 @@ def test_bigquery_magic_with_dict_params_duplicate(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_option_value_incorrect(): +def test_bigquery_magic_with_option_value_incorrect(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1650,9 +1698,12 @@ def test_bigquery_magic_with_option_value_incorrect(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_negative_value(ipython_ns_cleanup): +def test_bigquery_magic_with_dict_params_negative_value( + ipython_ns_cleanup, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1690,9 +1741,10 @@ def test_bigquery_magic_with_dict_params_negative_value(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup): +def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1730,9 +1782,10 @@ def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup): +def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1770,9 +1823,10 @@ def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_improperly_formatted_params(): +def test_bigquery_magic_with_improperly_formatted_params(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1788,9 +1842,12 @@ def test_bigquery_magic_with_improperly_formatted_params(): ) @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_valid_query_in_existing_variable(ipython_ns_cleanup, raw_sql): +def test_bigquery_magic_valid_query_in_existing_variable( + ipython_ns_cleanup, raw_sql, monkeypatch +): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1827,9 +1884,10 @@ def test_bigquery_magic_valid_query_in_existing_variable(ipython_ns_cleanup, raw @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_nonexisting_query_variable(): +def test_bigquery_magic_nonexisting_query_variable(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1851,9 +1909,10 @@ def test_bigquery_magic_nonexisting_query_variable(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_empty_query_variable_name(): +def test_bigquery_magic_empty_query_variable_name(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1873,9 +1932,10 @@ def test_bigquery_magic_empty_query_variable_name(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup): +def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup, monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1899,9 +1959,10 @@ def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_query_variable_not_identifier(): +def test_bigquery_magic_query_variable_not_identifier(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1922,9 +1983,10 @@ def test_bigquery_magic_query_variable_not_identifier(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_bigquery_magic_with_invalid_multiple_option_values(): +def test_bigquery_magic_with_invalid_multiple_option_values(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -1939,9 +2001,10 @@ def test_bigquery_magic_with_invalid_multiple_option_values(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_omits_tracebacks_from_error_message(): +def test_bigquery_magic_omits_tracebacks_from_error_message(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1966,9 +2029,10 @@ def test_bigquery_magic_omits_tracebacks_from_error_message(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_destination_table_invalid_format(): +def test_bigquery_magic_w_destination_table_invalid_format(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context._project = None credentials_mock = mock.create_autospec( @@ -1994,9 +2058,10 @@ def test_bigquery_magic_w_destination_table_invalid_format(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_w_destination_table(): +def test_bigquery_magic_w_destination_table(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -2026,9 +2091,10 @@ def test_bigquery_magic_w_destination_table(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_create_dataset_fails(): +def test_bigquery_magic_create_dataset_fails(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -2056,9 +2122,10 @@ def test_bigquery_magic_create_dataset_fails(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_with_location(): +def test_bigquery_magic_with_location(monkeypatch): ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") + monkeypatch.setattr(bigquery, "bigquery_magics", None) + bigquery.load_ipython_extension(ip) magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) From 14986bb4e6260e409d231cd03c116af1f3d5c0ef Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 17 Jul 2024 17:21:49 -0400 Subject: [PATCH 336/536] chore: update templated files (#1975) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: update templated files * remove obsolete code in owlbot.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .flake8 | 2 +- .github/.OwlBot.lock.yaml | 3 +- .github/auto-label.yaml | 2 +- .kokoro/build.sh | 2 +- .kokoro/docker/docs/Dockerfile | 23 +- .kokoro/docker/docs/requirements.txt | 40 ++- .kokoro/populate-secrets.sh | 2 +- .kokoro/publish-docs.sh | 2 +- .kokoro/release.sh | 2 +- .kokoro/requirements.txt | 509 ++++++++++++++------------- .kokoro/test-samples-against-head.sh | 2 +- .kokoro/test-samples-impl.sh | 2 +- .kokoro/test-samples.sh | 2 +- .kokoro/trampoline.sh | 2 +- .kokoro/trampoline_v2.sh | 2 +- .pre-commit-config.yaml | 2 +- .trampolinerc | 2 +- MANIFEST.in | 2 +- docs/conf.py | 2 +- owlbot.py | 23 -- scripts/decrypt-secrets.sh | 2 +- scripts/readme-gen/readme_gen.py | 2 +- 22 files changed, 319 insertions(+), 313 deletions(-) diff --git a/.flake8 b/.flake8 index 87f6e408c..32986c792 100644 --- a/.flake8 +++ b/.flake8 @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 81f87c569..001b1b1ca 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5a4c19d17e597b92d786e569be101e636c9c2817731f80a5adec56b2aa8fe070 -# created: 2024-04-12T11:35:58.922854369Z + digest: sha256:52210e0e0559f5ea8c52be148b33504022e1faef4e95fbe4b32d68022af2fa7e diff --git a/.github/auto-label.yaml b/.github/auto-label.yaml index 8b37ee897..21786a4eb 100644 --- a/.github/auto-label.yaml +++ b/.github/auto-label.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/build.sh b/.kokoro/build.sh index f38bda804..e4da2e2a7 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index bdaf39fe2..5205308b3 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ubuntu:22.04 +from ubuntu:24.04 ENV DEBIAN_FRONTEND noninteractive @@ -40,7 +40,6 @@ RUN apt-get update \ libssl-dev \ libsqlite3-dev \ portaudio19-dev \ - python3-distutils \ redis-server \ software-properties-common \ ssh \ @@ -60,18 +59,22 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb -###################### Install python 3.9.13 -# Download python 3.9.13 -RUN wget https://www.python.org/ftp/python/3.9.13/Python-3.9.13.tgz +###################### Install python 3.10.14 for docs/docfx session + +# Download python 3.10.14 +RUN wget https://www.python.org/ftp/python/3.10.14/Python-3.10.14.tgz # Extract files -RUN tar -xvf Python-3.9.13.tgz +RUN tar -xvf Python-3.10.14.tgz -# Install python 3.9.13 -RUN ./Python-3.9.13/configure --enable-optimizations +# Install python 3.10.14 +RUN ./Python-3.10.14/configure --enable-optimizations RUN make altinstall +RUN python3.10 -m venv /venv +ENV PATH /venv/bin:$PATH + ###################### Install pip RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ && python3 /tmp/get-pip.py \ @@ -84,4 +87,4 @@ RUN python3 -m pip COPY requirements.txt /requirements.txt RUN python3 -m pip install --require-hashes -r requirements.txt -CMD ["python3.8"] +CMD ["python3.10"] diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 0e5d70f20..7129c7715 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -4,9 +4,9 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.2.3 \ - --hash=sha256:bf7900329262e481be5a15f56f19736b376df6f82ed27576fa893652c5de6c23 \ - --hash=sha256:c12355e0494c76a2a7b73e3a59b09024ca0ba1e279fb9ed6c1b82d5b74b6a70c +argcomplete==3.4.0 \ + --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ + --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f # via nox colorlog==6.8.2 \ --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ @@ -16,23 +16,27 @@ distlib==0.3.8 \ --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 # via virtualenv -filelock==3.13.1 \ - --hash=sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e \ - --hash=sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c +filelock==3.15.4 \ + --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ + --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 # via virtualenv -nox==2024.3.2 \ - --hash=sha256:e53514173ac0b98dd47585096a55572fe504fecede58ced708979184d05440be \ - --hash=sha256:f521ae08a15adbf5e11f16cb34e8d0e6ea521e0b92868f684e91677deb974553 +nox==2024.4.15 \ + --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ + --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f # via -r requirements.in -packaging==24.0 \ - --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ - --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 +packaging==24.1 \ + --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ + --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 # via nox -platformdirs==4.2.0 \ - --hash=sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068 \ - --hash=sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768 +platformdirs==4.2.2 \ + --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ + --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 # via virtualenv -virtualenv==20.25.1 \ - --hash=sha256:961c026ac520bac5f69acb8ea063e8a4f071bcc9457b9c1f28f6b085c511583a \ - --hash=sha256:e08e13ecdca7a0bd53798f356d5831434afa5b07b93f0abdf0797b7a06ffe197 +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via nox +virtualenv==20.26.3 \ + --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ + --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 # via nox diff --git a/.kokoro/populate-secrets.sh b/.kokoro/populate-secrets.sh index 6f3972140..c435402f4 100755 --- a/.kokoro/populate-secrets.sh +++ b/.kokoro/populate-secrets.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC. +# Copyright 2024 Google LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 9eafe0be3..38f083f05 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 078fc1c20..81cee716e 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 51f92b8e1..9622baf0b 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -4,21 +4,25 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.1.4 \ - --hash=sha256:72558ba729e4c468572609817226fb0a6e7e9a0a7d477b882be168c0b4a62b94 \ - --hash=sha256:fbe56f8cda08aa9a04b307d8482ea703e96a6a801611acb4be9bf3942017989f +argcomplete==3.4.0 \ + --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ + --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f # via nox -attrs==23.1.0 \ - --hash=sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04 \ - --hash=sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015 +attrs==23.2.0 \ + --hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \ + --hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 # via gcp-releasetool -cachetools==5.3.2 \ - --hash=sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2 \ - --hash=sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1 +backports-tarfile==1.2.0 \ + --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \ + --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991 + # via jaraco-context +cachetools==5.3.3 \ + --hash=sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945 \ + --hash=sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105 # via google-auth -certifi==2023.7.22 \ - --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 \ - --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9 +certifi==2024.7.4 \ + --hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \ + --hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90 # via requests cffi==1.16.0 \ --hash=sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc \ @@ -87,90 +91,90 @@ click==8.0.4 \ # -r requirements.in # gcp-docuploader # gcp-releasetool -colorlog==6.7.0 \ - --hash=sha256:0d33ca236784a1ba3ff9c532d4964126d8a2c44f1f0cb1d2b0728196f512f662 \ - --hash=sha256:bd94bd21c1e13fac7bd3153f4bc3a7dc0eb0974b8bc2fdf1a989e474f6e582e5 +colorlog==6.8.2 \ + --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ + --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 # via # gcp-docuploader # nox -cryptography==42.0.5 \ - --hash=sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee \ - --hash=sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576 \ - --hash=sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d \ - --hash=sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30 \ - --hash=sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413 \ - --hash=sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb \ - --hash=sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da \ - --hash=sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4 \ - --hash=sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd \ - --hash=sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc \ - --hash=sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8 \ - --hash=sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1 \ - --hash=sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc \ - --hash=sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e \ - --hash=sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8 \ - --hash=sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940 \ - --hash=sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400 \ - --hash=sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7 \ - --hash=sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16 \ - --hash=sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278 \ - --hash=sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74 \ - --hash=sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec \ - --hash=sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1 \ - --hash=sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2 \ - --hash=sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c \ - --hash=sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922 \ - --hash=sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a \ - --hash=sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6 \ - --hash=sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1 \ - --hash=sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e \ - --hash=sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac \ - --hash=sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7 +cryptography==42.0.8 \ + --hash=sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad \ + --hash=sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583 \ + --hash=sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b \ + --hash=sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c \ + --hash=sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1 \ + --hash=sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648 \ + --hash=sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949 \ + --hash=sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba \ + --hash=sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c \ + --hash=sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9 \ + --hash=sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d \ + --hash=sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c \ + --hash=sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e \ + --hash=sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2 \ + --hash=sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d \ + --hash=sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7 \ + --hash=sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70 \ + --hash=sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2 \ + --hash=sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7 \ + --hash=sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14 \ + --hash=sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe \ + --hash=sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e \ + --hash=sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71 \ + --hash=sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961 \ + --hash=sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7 \ + --hash=sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c \ + --hash=sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28 \ + --hash=sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842 \ + --hash=sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902 \ + --hash=sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801 \ + --hash=sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a \ + --hash=sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e # via # -r requirements.in # gcp-releasetool # secretstorage -distlib==0.3.7 \ - --hash=sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057 \ - --hash=sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8 +distlib==0.3.8 \ + --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ + --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 # via virtualenv -docutils==0.20.1 \ - --hash=sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6 \ - --hash=sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b +docutils==0.21.2 \ + --hash=sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f \ + --hash=sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 # via readme-renderer -filelock==3.13.1 \ - --hash=sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e \ - --hash=sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c +filelock==3.15.4 \ + --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ + --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 # via virtualenv gcp-docuploader==0.6.5 \ --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea # via -r requirements.in -gcp-releasetool==2.0.0 \ - --hash=sha256:3d73480b50ba243f22d7c7ec08b115a30e1c7817c4899781840c26f9c55b8277 \ - --hash=sha256:7aa9fd935ec61e581eb8458ad00823786d91756c25e492f372b2b30962f3c28f +gcp-releasetool==2.0.1 \ + --hash=sha256:34314a910c08e8911d9c965bd44f8f2185c4f556e737d719c33a41f6a610de96 \ + --hash=sha256:b0d5863c6a070702b10883d37c4bdfd74bf930fe417f36c0c965d3b7c779ae62 # via -r requirements.in -google-api-core==2.12.0 \ - --hash=sha256:c22e01b1e3c4dcd90998494879612c38d0a3411d1f7b679eb89e2abe3ce1f553 \ - --hash=sha256:ec6054f7d64ad13b41e43d96f735acbd763b0f3b695dabaa2d579673f6a6e160 +google-api-core==2.19.1 \ + --hash=sha256:f12a9b8309b5e21d92483bbd47ce2c445861ec7d269ef6784ecc0ea8c1fa6125 \ + --hash=sha256:f4695f1e3650b316a795108a76a1c416e6afb036199d1c1f1f110916df479ffd # via # google-cloud-core # google-cloud-storage -google-auth==2.23.4 \ - --hash=sha256:79905d6b1652187def79d491d6e23d0cbb3a21d3c7ba0dbaa9c8a01906b13ff3 \ - --hash=sha256:d4bbc92fe4b8bfd2f3e8d88e5ba7085935da208ee38a134fc280e7ce682a05f2 +google-auth==2.31.0 \ + --hash=sha256:042c4702efa9f7d3c48d3a69341c209381b125faa6dbf3ebe56bc7e40ae05c23 \ + --hash=sha256:87805c36970047247c8afe614d4e3af8eceafc1ebba0c679fe75ddd1d575e871 # via # gcp-releasetool # google-api-core # google-cloud-core # google-cloud-storage -google-cloud-core==2.3.3 \ - --hash=sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb \ - --hash=sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863 +google-cloud-core==2.4.1 \ + --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ + --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 # via google-cloud-storage -google-cloud-storage==2.13.0 \ - --hash=sha256:ab0bf2e1780a1b74cf17fccb13788070b729f50c252f0c94ada2aae0ca95437d \ - --hash=sha256:f62dc4c7b6cd4360d072e3deb28035fbdad491ac3d9b0b1815a12daea10f37c7 +google-cloud-storage==2.17.0 \ + --hash=sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388 \ + --hash=sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1 # via gcp-docuploader google-crc32c==1.5.0 \ --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ @@ -244,28 +248,36 @@ google-crc32c==1.5.0 \ # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.6.0 \ - --hash=sha256:972852f6c65f933e15a4a210c2b96930763b47197cdf4aa5f5bea435efb626e7 \ - --hash=sha256:fc03d344381970f79eebb632a3c18bb1828593a2dc5572b5f90115ef7d11e81b +google-resumable-media==2.7.1 \ + --hash=sha256:103ebc4ba331ab1bfdac0250f8033627a2cd7cde09e7ccff9181e31ba4315b2c \ + --hash=sha256:eae451a7b2e2cdbaaa0fd2eb00cc8a1ee5e95e16b55597359cbc3d27d7d90e33 # via google-cloud-storage -googleapis-common-protos==1.61.0 \ - --hash=sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0 \ - --hash=sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b +googleapis-common-protos==1.63.2 \ + --hash=sha256:27a2499c7e8aff199665b22741997e485eccc8645aa9176c7c988e6fae507945 \ + --hash=sha256:27c5abdffc4911f28101e635de1533fb4cfd2c37fbaa9174587c799fac90aa87 # via google-api-core idna==3.7 \ --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 # via requests -importlib-metadata==6.8.0 \ - --hash=sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb \ - --hash=sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743 +importlib-metadata==8.0.0 \ + --hash=sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f \ + --hash=sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812 # via # -r requirements.in # keyring # twine -jaraco-classes==3.3.0 \ - --hash=sha256:10afa92b6743f25c0cf5f37c6bb6e18e2c5bb84a16527ccfc0040ea377e7aaeb \ - --hash=sha256:c063dd08e89217cee02c8d5e5ec560f2c8ce6cdc2fcdc2e68f7b2e5547ed3621 +jaraco-classes==3.4.0 \ + --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \ + --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790 + # via keyring +jaraco-context==5.3.0 \ + --hash=sha256:3e16388f7da43d384a1a7cd3452e72e14732ac9fe459678773a3608a812bf266 \ + --hash=sha256:c2f67165ce1f9be20f32f650f25d8edfc1646a8aeee48ae06fb35f90763576d2 + # via keyring +jaraco-functools==4.0.1 \ + --hash=sha256:3b24ccb921d6b593bdceb56ce14799204f473976e2a9d4b15b04d0f2c2326664 \ + --hash=sha256:d33fa765374c0611b52f8b3a795f8900869aa88c84769d4d1746cd68fb28c3e8 # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ @@ -273,13 +285,13 @@ jeepney==0.8.0 \ # via # keyring # secretstorage -jinja2==3.1.3 \ - --hash=sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa \ - --hash=sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90 +jinja2==3.1.4 \ + --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ + --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d # via gcp-releasetool -keyring==24.2.0 \ - --hash=sha256:4901caaf597bfd3bbd78c9a0c7c4c29fcd8310dab2cffefe749e916b6527acd6 \ - --hash=sha256:ca0746a19ec421219f4d713f848fa297a661a8a8c1504867e55bfb5e09091509 +keyring==25.2.1 \ + --hash=sha256:2458681cdefc0dbc0b7eb6cf75d0b98e59f9ad9b2d4edd319d18f68bdca95e50 \ + --hash=sha256:daaffd42dbda25ddafb1ad5fec4024e5bbcfe424597ca1ca452b299861e49f1b # via # gcp-releasetool # twine @@ -287,146 +299,153 @@ markdown-it-py==3.0.0 \ --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb # via rich -markupsafe==2.1.3 \ - --hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \ - --hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \ - --hash=sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431 \ - --hash=sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686 \ - --hash=sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c \ - --hash=sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559 \ - --hash=sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc \ - --hash=sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb \ - --hash=sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939 \ - --hash=sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c \ - --hash=sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0 \ - --hash=sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4 \ - --hash=sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9 \ - --hash=sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575 \ - --hash=sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba \ - --hash=sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d \ - --hash=sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd \ - --hash=sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3 \ - --hash=sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00 \ - --hash=sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155 \ - --hash=sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac \ - --hash=sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52 \ - --hash=sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f \ - --hash=sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8 \ - --hash=sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b \ - --hash=sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007 \ - --hash=sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24 \ - --hash=sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea \ - --hash=sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198 \ - --hash=sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0 \ - --hash=sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee \ - --hash=sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be \ - --hash=sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2 \ - --hash=sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1 \ - --hash=sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707 \ - --hash=sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6 \ - --hash=sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c \ - --hash=sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58 \ - --hash=sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823 \ - --hash=sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779 \ - --hash=sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636 \ - --hash=sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c \ - --hash=sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad \ - --hash=sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee \ - --hash=sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc \ - --hash=sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2 \ - --hash=sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48 \ - --hash=sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7 \ - --hash=sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e \ - --hash=sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b \ - --hash=sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa \ - --hash=sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5 \ - --hash=sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e \ - --hash=sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb \ - --hash=sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9 \ - --hash=sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57 \ - --hash=sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc \ - --hash=sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc \ - --hash=sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2 \ - --hash=sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11 +markupsafe==2.1.5 \ + --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \ + --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \ + --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \ + --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \ + --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \ + --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \ + --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \ + --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \ + --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \ + --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \ + --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \ + --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \ + --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \ + --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \ + --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \ + --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \ + --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \ + --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \ + --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \ + --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \ + --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \ + --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \ + --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \ + --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \ + --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \ + --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \ + --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \ + --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \ + --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \ + --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \ + --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \ + --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \ + --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \ + --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \ + --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \ + --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \ + --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \ + --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \ + --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \ + --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \ + --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \ + --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \ + --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \ + --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \ + --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \ + --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \ + --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \ + --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \ + --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \ + --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \ + --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \ + --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \ + --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \ + --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \ + --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \ + --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \ + --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \ + --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \ + --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \ + --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68 # via jinja2 mdurl==0.1.2 \ --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba # via markdown-it-py -more-itertools==10.1.0 \ - --hash=sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a \ - --hash=sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6 - # via jaraco-classes -nh3==0.2.14 \ - --hash=sha256:116c9515937f94f0057ef50ebcbcc10600860065953ba56f14473ff706371873 \ - --hash=sha256:18415df36db9b001f71a42a3a5395db79cf23d556996090d293764436e98e8ad \ - --hash=sha256:203cac86e313cf6486704d0ec620a992c8bc164c86d3a4fd3d761dd552d839b5 \ - --hash=sha256:2b0be5c792bd43d0abef8ca39dd8acb3c0611052ce466d0401d51ea0d9aa7525 \ - --hash=sha256:377aaf6a9e7c63962f367158d808c6a1344e2b4f83d071c43fbd631b75c4f0b2 \ - --hash=sha256:525846c56c2bcd376f5eaee76063ebf33cf1e620c1498b2a40107f60cfc6054e \ - --hash=sha256:5529a3bf99402c34056576d80ae5547123f1078da76aa99e8ed79e44fa67282d \ - --hash=sha256:7771d43222b639a4cd9e341f870cee336b9d886de1ad9bec8dddab22fe1de450 \ - --hash=sha256:88c753efbcdfc2644a5012938c6b9753f1c64a5723a67f0301ca43e7b85dcf0e \ - --hash=sha256:93a943cfd3e33bd03f77b97baa11990148687877b74193bf777956b67054dcc6 \ - --hash=sha256:9be2f68fb9a40d8440cbf34cbf40758aa7f6093160bfc7fb018cce8e424f0c3a \ - --hash=sha256:a0c509894fd4dccdff557068e5074999ae3b75f4c5a2d6fb5415e782e25679c4 \ - --hash=sha256:ac8056e937f264995a82bf0053ca898a1cb1c9efc7cd68fa07fe0060734df7e4 \ - --hash=sha256:aed56a86daa43966dd790ba86d4b810b219f75b4bb737461b6886ce2bde38fd6 \ - --hash=sha256:e8986f1dd3221d1e741fda0a12eaa4a273f1d80a35e31a1ffe579e7c621d069e \ - --hash=sha256:f99212a81c62b5f22f9e7c3e347aa00491114a5647e1f13bbebd79c3e5f08d75 +more-itertools==10.3.0 \ + --hash=sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463 \ + --hash=sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320 + # via + # jaraco-classes + # jaraco-functools +nh3==0.2.18 \ + --hash=sha256:0411beb0589eacb6734f28d5497ca2ed379eafab8ad8c84b31bb5c34072b7164 \ + --hash=sha256:14c5a72e9fe82aea5fe3072116ad4661af5cf8e8ff8fc5ad3450f123e4925e86 \ + --hash=sha256:19aaba96e0f795bd0a6c56291495ff59364f4300d4a39b29a0abc9cb3774a84b \ + --hash=sha256:34c03fa78e328c691f982b7c03d4423bdfd7da69cd707fe572f544cf74ac23ad \ + --hash=sha256:36c95d4b70530b320b365659bb5034341316e6a9b30f0b25fa9c9eff4c27a204 \ + --hash=sha256:3a157ab149e591bb638a55c8c6bcb8cdb559c8b12c13a8affaba6cedfe51713a \ + --hash=sha256:42c64511469005058cd17cc1537578eac40ae9f7200bedcfd1fc1a05f4f8c200 \ + --hash=sha256:5f36b271dae35c465ef5e9090e1fdaba4a60a56f0bb0ba03e0932a66f28b9189 \ + --hash=sha256:6955369e4d9f48f41e3f238a9e60f9410645db7e07435e62c6a9ea6135a4907f \ + --hash=sha256:7b7c2a3c9eb1a827d42539aa64091640bd275b81e097cd1d8d82ef91ffa2e811 \ + --hash=sha256:8ce0f819d2f1933953fca255db2471ad58184a60508f03e6285e5114b6254844 \ + --hash=sha256:94a166927e53972a9698af9542ace4e38b9de50c34352b962f4d9a7d4c927af4 \ + --hash=sha256:a7f1b5b2c15866f2db413a3649a8fe4fd7b428ae58be2c0f6bca5eefd53ca2be \ + --hash=sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50 \ + --hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \ + --hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe # via readme-renderer -nox==2023.4.22 \ - --hash=sha256:0b1adc619c58ab4fa57d6ab2e7823fe47a32e70202f287d78474adcc7bda1891 \ - --hash=sha256:46c0560b0dc609d7d967dc99e22cb463d3c4caf54a5fda735d6c11b5177e3a9f +nox==2024.4.15 \ + --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ + --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f # via -r requirements.in -packaging==23.2 \ - --hash=sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5 \ - --hash=sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7 +packaging==24.1 \ + --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ + --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 # via # gcp-releasetool # nox -pkginfo==1.9.6 \ - --hash=sha256:4b7a555a6d5a22169fcc9cf7bfd78d296b0361adad412a346c1226849af5e546 \ - --hash=sha256:8fd5896e8718a4372f0ea9cc9d96f6417c9b986e23a4d116dda26b62cc29d046 +pkginfo==1.10.0 \ + --hash=sha256:5df73835398d10db79f8eecd5cd86b1f6d29317589ea70796994d49399af6297 \ + --hash=sha256:889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 # via twine -platformdirs==3.11.0 \ - --hash=sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3 \ - --hash=sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e +platformdirs==4.2.2 \ + --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ + --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 # via virtualenv -protobuf==4.25.3 \ - --hash=sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4 \ - --hash=sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8 \ - --hash=sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c \ - --hash=sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d \ - --hash=sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4 \ - --hash=sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa \ - --hash=sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c \ - --hash=sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019 \ - --hash=sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9 \ - --hash=sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c \ - --hash=sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2 +proto-plus==1.24.0 \ + --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ + --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 + # via google-api-core +protobuf==5.27.2 \ + --hash=sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505 \ + --hash=sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b \ + --hash=sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38 \ + --hash=sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863 \ + --hash=sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470 \ + --hash=sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6 \ + --hash=sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce \ + --hash=sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca \ + --hash=sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5 \ + --hash=sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e \ + --hash=sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714 # via # gcp-docuploader # gcp-releasetool # google-api-core # googleapis-common-protos -pyasn1==0.5.0 \ - --hash=sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57 \ - --hash=sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde + # proto-plus +pyasn1==0.6.0 \ + --hash=sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c \ + --hash=sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473 # via # pyasn1-modules # rsa -pyasn1-modules==0.3.0 \ - --hash=sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c \ - --hash=sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d +pyasn1-modules==0.4.0 \ + --hash=sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6 \ + --hash=sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b # via google-auth -pycparser==2.21 \ - --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ - --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 +pycparser==2.22 \ + --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ + --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc # via cffi -pygments==2.16.1 \ - --hash=sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692 \ - --hash=sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29 +pygments==2.18.0 \ + --hash=sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199 \ + --hash=sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a # via # readme-renderer # rich @@ -434,20 +453,20 @@ pyjwt==2.8.0 \ --hash=sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de \ --hash=sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320 # via gcp-releasetool -pyperclip==1.8.2 \ - --hash=sha256:105254a8b04934f0bc84e9c24eb360a591aaf6535c9def5f29d92af107a9bf57 +pyperclip==1.9.0 \ + --hash=sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310 # via gcp-releasetool -python-dateutil==2.8.2 \ - --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ - --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 +python-dateutil==2.9.0.post0 \ + --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ + --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 # via gcp-releasetool -readme-renderer==42.0 \ - --hash=sha256:13d039515c1f24de668e2c93f2e877b9dbe6c6c32328b90a40a49d8b2b85f36d \ - --hash=sha256:2d55489f83be4992fe4454939d1a051c33edbab778e82761d060c9fc6b308cd1 +readme-renderer==44.0 \ + --hash=sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151 \ + --hash=sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1 # via twine -requests==2.31.0 \ - --hash=sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f \ - --hash=sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1 +requests==2.32.3 \ + --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ + --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 # via # gcp-releasetool # google-api-core @@ -462,9 +481,9 @@ rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c # via twine -rich==13.6.0 \ - --hash=sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245 \ - --hash=sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef +rich==13.7.1 \ + --hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \ + --hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432 # via twine rsa==4.9 \ --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ @@ -480,35 +499,39 @@ six==1.16.0 \ # via # gcp-docuploader # python-dateutil -twine==4.0.2 \ - --hash=sha256:929bc3c280033347a00f847236564d1c52a3e61b1ac2516c97c48f3ceab756d8 \ - --hash=sha256:9e102ef5fdd5a20661eb88fad46338806c3bd32cf1db729603fe3697b1bc83c8 +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via nox +twine==5.1.1 \ + --hash=sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997 \ + --hash=sha256:9aa0825139c02b3434d913545c7b847a21c835e11597f5255842d457da2322db # via -r requirements.in -typing-extensions==4.8.0 \ - --hash=sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0 \ - --hash=sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef +typing-extensions==4.12.2 \ + --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ + --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 # via -r requirements.in -urllib3==2.0.7 \ - --hash=sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84 \ - --hash=sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e +urllib3==2.2.2 \ + --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ + --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 # via # requests # twine -virtualenv==20.24.6 \ - --hash=sha256:02ece4f56fbf939dbbc33c0715159951d6bf14aaf5457b092e4548e1382455af \ - --hash=sha256:520d056652454c5098a00c0f073611ccbea4c79089331f60bf9d7ba247bb7381 +virtualenv==20.26.3 \ + --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ + --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 # via nox -wheel==0.41.3 \ - --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \ - --hash=sha256:4d4987ce51a49370ea65c0bfd2234e8ce80a12780820d9dc462597a6e60d0841 +wheel==0.43.0 \ + --hash=sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85 \ + --hash=sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81 # via -r requirements.in -zipp==3.17.0 \ - --hash=sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31 \ - --hash=sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0 +zipp==3.19.2 \ + --hash=sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19 \ + --hash=sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==69.2.0 \ - --hash=sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e \ - --hash=sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c +setuptools==70.2.0 \ + --hash=sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05 \ + --hash=sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1 # via -r requirements.in diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh index 63ac41dfa..e9d8bd79a 100755 --- a/.kokoro/test-samples-against-head.sh +++ b/.kokoro/test-samples-against-head.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 5a0f5fab6..55910c8ba 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index 50b35a48c..7933d8201 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/trampoline.sh b/.kokoro/trampoline.sh index d85b1f267..48f796997 100755 --- a/.kokoro/trampoline.sh +++ b/.kokoro/trampoline.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 59a7cf3a9..35fa52923 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a8e16950..1d74695f7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.trampolinerc b/.trampolinerc index a7dfeb42c..008015237 100644 --- a/.trampolinerc +++ b/.trampolinerc @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/MANIFEST.in b/MANIFEST.in index e0a667053..d6814cd60 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/docs/conf.py b/docs/conf.py index d0468e25a..826298090 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/owlbot.py b/owlbot.py index 778cc3e53..07805d11a 100644 --- a/owlbot.py +++ b/owlbot.py @@ -101,29 +101,6 @@ # Add .pytype to .gitignore s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") -# Add pytype config to setup.cfg -s.replace( - "setup.cfg", - r"universal = 1", - textwrap.dedent( - """ \\g<0> - - [pytype] - python_version = 3.8 - inputs = - google/cloud/ - exclude = - tests/ - google/cloud/bigquery_v2/ # Legacy proto-based types. - output = .pytype/ - disable = - # There's some issue with finding some pyi files, thus disabling. - # The issue https://github.com/google/pytype/issues/150 is closed, but the - # error still occurs for some reason. - pyi-error""" - ), -) - s.shell.run(["nox", "-s", "blacken"], hide_output=False) for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) diff --git a/scripts/decrypt-secrets.sh b/scripts/decrypt-secrets.sh index 0018b421d..120b0ddc4 100755 --- a/scripts/decrypt-secrets.sh +++ b/scripts/decrypt-secrets.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2023 Google LLC All rights reserved. +# Copyright 2024 Google LLC All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/readme-gen/readme_gen.py b/scripts/readme-gen/readme_gen.py index 1acc11983..8f5e248a0 100644 --- a/scripts/readme-gen/readme_gen.py +++ b/scripts/readme-gen/readme_gen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 57bf873474382cc2cb34243b704bc928fa1b64c6 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 17 Jul 2024 17:35:07 -0400 Subject: [PATCH 337/536] fix: Allow protobuf 5.x; require protobuf >=3.20.2; proto-plus >=1.22.3 (#1976) * fix: Allow protobuf 5.x; require protobuf >=3.20.2; proto-plus >=1.22.3 * Update constraints --------- Co-authored-by: Lingqing Gan --- setup.py | 4 ++-- testing/constraints-3.7.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 9641fe695..617685543 100644 --- a/setup.py +++ b/setup.py @@ -75,8 +75,8 @@ "opentelemetry-instrumentation >= 0.20b0", ], "bigquery_v2": [ - "proto-plus >= 1.22.0, <2.0.0dev", - "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. + "proto-plus >= 1.22.3, <2.0.0dev", + "protobuf>=3.20.2,<6.0.0dev,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. ], } diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index fda7ce951..55e63449f 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -26,8 +26,8 @@ opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 packaging==20.0.0 pandas==1.1.0 -proto-plus==1.22.0 -protobuf==3.19.5 +proto-plus==1.22.3 +protobuf==3.20.2 pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.21.0 From ba61a8ab0da541ba1940211875d7ea2e9e17dfa8 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 19 Jul 2024 13:13:40 -0700 Subject: [PATCH 338/536] docs: add short mode query sample & test (#1978) * docs: add short mode query sample & test --- samples/client_query_shortmode.py | 53 ++++++++++++++++++++ samples/tests/test_client_query_shortmode.py | 26 ++++++++++ 2 files changed, 79 insertions(+) create mode 100644 samples/client_query_shortmode.py create mode 100644 samples/tests/test_client_query_shortmode.py diff --git a/samples/client_query_shortmode.py b/samples/client_query_shortmode.py new file mode 100644 index 000000000..50446dc48 --- /dev/null +++ b/samples/client_query_shortmode.py @@ -0,0 +1,53 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_shortmode() -> None: + # [START bigquery_query_shortquery] + # This example demonstrates issuing a query that may be run in short query mode. + # + # To enable the short query mode preview feature, the QUERY_PREVIEW_ENABLED + # environmental variable should be set to `TRUE`. + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + query = """ + SELECT + name, + gender, + SUM(number) AS total + FROM + bigquery-public-data.usa_names.usa_1910_2013 + GROUP BY + name, gender + ORDER BY + total DESC + LIMIT 10 + """ + # Run the query. The returned `rows` iterator can return information about + # how the query was executed as well as the result data. + rows = client.query_and_wait(query) + + if rows.job_id is not None: + print("Query was run with job state. Job ID: {}".format(rows.job_id)) + else: + print("Query was run in short mode. Query ID: {}".format(rows.query_id)) + + print("The query data:") + for row in rows: + # Row values can be accessed by field name or index. + print("name={}, gender={}, total={}".format(row[0], row[1], row["total"])) + # [END bigquery_query_shortquery] diff --git a/samples/tests/test_client_query_shortmode.py b/samples/tests/test_client_query_shortmode.py new file mode 100644 index 000000000..41132f24c --- /dev/null +++ b/samples/tests/test_client_query_shortmode.py @@ -0,0 +1,26 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from .. import client_query_shortmode + +if typing.TYPE_CHECKING: + import pytest + + +def test_client_query_shortmode(capsys: "pytest.CaptureFixture[str]") -> None: + client_query_shortmode.client_query_shortmode() + out, err = capsys.readouterr() + assert "Query was run" in out From 4383cfe7c7571ffaa3efd5e45ca33c6ff978f274 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 25 Jul 2024 20:16:55 +0200 Subject: [PATCH 339/536] chore(deps): update all dependencies (#1982) --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 14 +++++++------- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 8f0bfaad4..68f9039cc 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.2; python_version >= '3.8' +pytest==8.3.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 6a3d17c6f..dafb60b2a 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ google-cloud-bigquery==3.25.0 -google-auth-oauthlib==1.2.0 +google-auth-oauthlib==1.2.1 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index b35a54a76..335236a14 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.2.2; python_version >= '3.8' +pytest==8.3.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index add61f286..ccd9f4f2e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,17 +12,17 @@ Fiona==1.9.6 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' -geopandas==0.14.4; python_version >= '3.9' -google-api-core==2.19.0 -google-auth==2.30.0 +geopandas==1.0.1; python_version >= '3.9' +google-api-core==2.19.1 +google-auth==2.32.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.1 -googleapis-common-protos==1.63.1 +googleapis-common-protos==1.63.2 grpcio===1.62.2; python_version == '3.7' -grpcio==1.64.1; python_version >= '3.8' +grpcio==1.65.1; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 @@ -33,7 +33,7 @@ pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==16.1.0; python_version >= '3.8' +pyarrow==17.0.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' @@ -47,7 +47,7 @@ PyYAML==6.0.1 requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 -Shapely==2.0.4 +Shapely==2.0.5 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 8f0bfaad4..68f9039cc 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.2; python_version >= '3.8' +pytest==8.3.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 8f0bfaad4..68f9039cc 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.2; python_version >= '3.8' +pytest==8.3.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 3896a2aec..81fa3782c 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -7,7 +7,7 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.9.0; python_version >= '3.9' +matplotlib==3.9.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index b65023b00..083b20271 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.2; python_version >= '3.8' +pytest==8.3.2; python_version >= '3.8' mock==5.1.0 From b7eb8815c42a47306a5dfc7207882ba45423bc83 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 9 Aug 2024 14:47:03 -0500 Subject: [PATCH 340/536] chore(python): fix docs build (#1984) Source-Link: https://github.com/googleapis/synthtool/commit/bef813d194de29ddf3576eda60148b6b3dcc93d9 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:94bb690db96e6242b2567a4860a94d48fa48696d092e51b0884a1a2c0a79a407 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 3 ++- .kokoro/docker/docs/Dockerfile | 9 ++++----- .kokoro/publish-docs.sh | 20 ++++++++++---------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 001b1b1ca..6d064ddb9 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:52210e0e0559f5ea8c52be148b33504022e1faef4e95fbe4b32d68022af2fa7e + digest: sha256:94bb690db96e6242b2567a4860a94d48fa48696d092e51b0884a1a2c0a79a407 +# created: 2024-07-31T14:52:44.926548819Z diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index 5205308b3..e5410e296 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -72,19 +72,18 @@ RUN tar -xvf Python-3.10.14.tgz RUN ./Python-3.10.14/configure --enable-optimizations RUN make altinstall -RUN python3.10 -m venv /venv -ENV PATH /venv/bin:$PATH +ENV PATH /usr/local/bin/python3.10:$PATH ###################### Install pip RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3 /tmp/get-pip.py \ + && python3.10 /tmp/get-pip.py \ && rm /tmp/get-pip.py # Test pip -RUN python3 -m pip +RUN python3.10 -m pip # Install build requirements COPY requirements.txt /requirements.txt -RUN python3 -m pip install --require-hashes -r requirements.txt +RUN python3.10 -m pip install --require-hashes -r requirements.txt CMD ["python3.10"] diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 38f083f05..233205d58 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -21,18 +21,18 @@ export PYTHONUNBUFFERED=1 export PATH="${HOME}/.local/bin:${PATH}" # Install nox -python3 -m pip install --require-hashes -r .kokoro/requirements.txt -python3 -m nox --version +python3.10 -m pip install --require-hashes -r .kokoro/requirements.txt +python3.10 -m nox --version # build docs nox -s docs # create metadata -python3 -m docuploader create-metadata \ +python3.10 -m docuploader create-metadata \ --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3 setup.py --version) \ + --version=$(python3.10 setup.py --version) \ --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3 setup.py --name) \ + --distribution-name=$(python3.10 setup.py --name) \ --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) @@ -40,18 +40,18 @@ python3 -m docuploader create-metadata \ cat docs.metadata # upload docs -python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" +python3.10 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" # docfx yaml files nox -s docfx # create metadata. -python3 -m docuploader create-metadata \ +python3.10 -m docuploader create-metadata \ --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3 setup.py --version) \ + --version=$(python3.10 setup.py --version) \ --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3 setup.py --name) \ + --distribution-name=$(python3.10 setup.py --name) \ --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) @@ -59,4 +59,4 @@ python3 -m docuploader create-metadata \ cat docs.metadata # upload docs -python3 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" +python3.10 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" From d0bb87a3c43544a838da40ef3c581b4c2aa91506 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 13 Aug 2024 21:47:33 +0200 Subject: [PATCH 341/536] chore(deps): update all dependencies (#1983) Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ccd9f4f2e..1a1cf4b04 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ -attrs==23.2.0 +attrs==24.1.0 certifi==2024.7.4 cffi===1.15.1; python_version == '3.7' cffi==1.16.0; python_version >= '3.8' @@ -22,7 +22,7 @@ google-crc32c==1.5.0 google-resumable-media==2.7.1 googleapis-common-protos==1.63.2 grpcio===1.62.2; python_version == '3.7' -grpcio==1.65.1; python_version >= '3.8' +grpcio==1.65.4; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 From 8f5a41d283a965ca161019588d3a3b2947b04b5b Mon Sep 17 00:00:00 2001 From: Suzy Mueller Date: Tue, 13 Aug 2024 15:24:24 -0700 Subject: [PATCH 342/536] fix: add warning when encountering unknown field types (#1989) * fix: add warning when encountering unknown field types The types returned for currently unsupported field types may change in the future, when support is added. Warn users that the types they are using are not yet supported. * fix: add warning for unknown subfield types as well * fix: remove unused warnings * fix: remove leftover debugging code * move test case closer to related test * add comments * fix formatting * fix test_table and use warnings.warn instead of pytest.warn * add explicit warning about behavior subject to change in the future add warning for write and warn about future behavior changes * add default converter for _SCALAR_VALUE_TO_JSON_PARAM * factor out shared warning * fix test case and make coverage happy * add unit test to StructQueryParameter class --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/_helpers.py | 38 +++++++++++---- google/cloud/bigquery/_pandas_helpers.py | 4 +- google/cloud/bigquery/query.py | 20 ++++---- tests/unit/test__helpers.py | 62 +++++++++++++++++++++++- tests/unit/test_query.py | 19 ++++++++ tests/unit/test_table.py | 6 +-- 6 files changed, 123 insertions(+), 26 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 5ee5e1850..1eda80712 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -21,6 +21,7 @@ import math import re import os +import warnings from typing import Optional, Union from dateutil import relativedelta @@ -297,12 +298,7 @@ def _record_from_json(value, field): record = {} record_iter = zip(field.fields, value["f"]) for subfield, cell in record_iter: - converter = _CELLDATA_FROM_JSON[subfield.field_type] - if subfield.mode == "REPEATED": - value = [converter(item["v"], subfield) for item in cell["v"]] - else: - value = converter(cell["v"], subfield) - record[subfield.name] = value + record[subfield.name] = _field_from_json(cell["v"], subfield) return record @@ -382,7 +378,11 @@ def _field_to_index_mapping(schema): def _field_from_json(resource, field): - converter = _CELLDATA_FROM_JSON.get(field.field_type, lambda value, _: value) + def default_converter(value, field): + _warn_unknown_field_type(field) + return value + + converter = _CELLDATA_FROM_JSON.get(field.field_type, default_converter) if field.mode == "REPEATED": return [converter(item["v"], field) for item in resource] else: @@ -484,6 +484,11 @@ def _json_to_json(value): return json.dumps(value) +def _string_to_json(value): + """NOOP string -> string coercion""" + return value + + def _timestamp_to_json_parameter(value): """Coerce 'value' to an JSON-compatible representation. @@ -596,6 +601,7 @@ def _range_field_to_json(range_element_type, value): "DATE": _date_to_json, "TIME": _time_to_json, "JSON": _json_to_json, + "STRING": _string_to_json, # Make sure DECIMAL and BIGDECIMAL are handled, even though # requests for them should be converted to NUMERIC. Better safe # than sorry. @@ -609,6 +615,15 @@ def _range_field_to_json(range_element_type, value): _SCALAR_VALUE_TO_JSON_PARAM["TIMESTAMP"] = _timestamp_to_json_parameter +def _warn_unknown_field_type(field): + warnings.warn( + "Unknown type '{}' for field '{}'. Behavior reading and writing this type is not officially supported and may change in the future.".format( + field.field_type, field.name + ), + FutureWarning, + ) + + def _scalar_field_to_json(field, row_value): """Maps a field and value to a JSON-safe value. @@ -621,9 +636,12 @@ def _scalar_field_to_json(field, row_value): Returns: Any: A JSON-serializable object. """ - converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) - if converter is None: # STRING doesn't need converting - return row_value + + def default_converter(value): + _warn_unknown_field_type(field) + return value + + converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type, default_converter) return converter(row_value) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 8395478fb..c21a02569 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -204,7 +204,9 @@ def bq_to_arrow_field(bq_field, array_type=None): metadata=metadata, ) - warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name)) + warnings.warn( + "Unable to determine Arrow type for field '{}'.".format(bq_field.name) + ) return None diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 9c59056fd..f1090a7dc 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -591,9 +591,8 @@ def to_api_repr(self) -> dict: Dict: JSON mapping """ value = self.value - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) - if converter is not None: - value = converter(value) # type: ignore + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_, lambda value: value) + value = converter(value) # type: ignore resource: Dict[str, Any] = { "parameterType": {"type": self.type_}, "parameterValue": {"value": value}, @@ -748,9 +747,10 @@ def to_api_repr(self) -> dict: else: a_type = self.array_type.to_api_repr() - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"]) - if converter is not None: - values = [converter(value) for value in values] # type: ignore + converter = _SCALAR_VALUE_TO_JSON_PARAM.get( + a_type["type"], lambda value: value + ) + values = [converter(value) for value in values] # type: ignore a_values = [{"value": value} for value in values] resource = { @@ -792,7 +792,7 @@ def __repr__(self): class StructQueryParameter(_AbstractQueryParameter): - """Named / positional query parameters for struct values. + """Name / positional query parameters for struct values. Args: name (Optional[str]): @@ -897,10 +897,8 @@ def to_api_repr(self) -> dict: values[name] = repr_["parameterValue"] else: s_types[name] = {"name": name, "type": {"type": type_}} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) - if converter is not None: - value = converter(value) # type: ignore - values[name] = {"value": value} + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_, lambda value: value) + values[name] = {"value": converter(value)} resource = { "parameterType": { diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 1bf21479f..0a307498f 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -17,6 +17,7 @@ import decimal import json import os +import warnings import pytest import packaging import unittest @@ -640,6 +641,17 @@ def test_w_single_scalar_column(self): row = {"f": [{"v": "1"}]} self.assertEqual(self._call_fut(row, schema=[col]), (1,)) + def test_w_unknown_type(self): + # SELECT 1 AS col + col = _Field("REQUIRED", "col", "UNKNOWN") + row = {"f": [{"v": "1"}]} + with warnings.catch_warnings(record=True) as warned: + self.assertEqual(self._call_fut(row, schema=[col]), ("1",)) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("UNKNOWN" in str(warning)) + self.assertTrue("col" in str(warning)) + def test_w_single_scalar_geography_column(self): # SELECT 1 AS col col = _Field("REQUIRED", "geo", "GEOGRAPHY") @@ -660,6 +672,17 @@ def test_w_single_array_column(self): row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]} self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],)) + def test_w_unknown_type_repeated(self): + # SELECT 1 AS col + col = _Field("REPEATED", "col", "UNKNOWN") + row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]} + with warnings.catch_warnings(record=True) as warned: + self.assertEqual(self._call_fut(row, schema=[col]), (["1", "2", "3"],)) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("UNKNOWN" in str(warning)) + self.assertTrue("col" in str(warning)) + def test_w_struct_w_nested_array_column(self): # SELECT ([1, 2], 3, [4, 5]) as col first = _Field("REPEATED", "first", "INTEGER") @@ -684,6 +707,39 @@ def test_w_struct_w_nested_array_column(self): ({"first": [1, 2], "second": 3, "third": [4, 5]},), ) + def test_w_unknown_type_subfield(self): + # SELECT [(1, 2, 3), (4, 5, 6)] as col + first = _Field("REPEATED", "first", "UNKNOWN1") + second = _Field("REQUIRED", "second", "UNKNOWN2") + third = _Field("REPEATED", "third", "INTEGER") + col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third]) + row = { + "f": [ + { + "v": { + "f": [ + {"v": [{"v": "1"}, {"v": "2"}]}, + {"v": "3"}, + {"v": [{"v": "4"}, {"v": "5"}]}, + ] + } + } + ] + } + with warnings.catch_warnings(record=True) as warned: + self.assertEqual( + self._call_fut(row, schema=[col]), + ({"first": ["1", "2"], "second": "3", "third": [4, 5]},), + ) + self.assertEqual(len(warned), 2) # 1 warning per unknown field. + warned = [str(warning) for warning in warned] + self.assertTrue( + any(["first" in warning and "UNKNOWN1" in warning for warning in warned]) + ) + self.assertTrue( + any(["second" in warning and "UNKNOWN2" in warning for warning in warned]) + ) + def test_w_array_of_struct(self): # SELECT [(1, 2, 3), (4, 5, 6)] as col first = _Field("REQUIRED", "first", "INTEGER") @@ -1076,8 +1132,12 @@ def _call_fut(self, field, value): def test_w_unknown_field_type(self): field = _make_field("UNKNOWN") original = object() - converted = self._call_fut(field, original) + with warnings.catch_warnings(record=True) as warned: + converted = self._call_fut(field, original) self.assertIs(converted, original) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("UNKNOWN" in str(warning)) def test_w_known_field_type(self): field = _make_field("INT64") diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 7c36eb75b..40ef080f7 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1780,6 +1780,25 @@ def test_to_api_repr_w_nested_struct(self): param = self._make_one("foo", scalar_1, sub) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + "name": "foo", + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "INT64"}}, + {"name": "baz", "type": {"type": "UNKNOWN_TYPE"}}, + ], + }, + "parameterValue": { + "structValues": {"bar": {"value": "123"}, "baz": {"value": "abc"}} + }, + } + sub_1 = _make_subparam("bar", "INT64", 123) + sub_2 = _make_subparam("baz", "UNKNOWN_TYPE", "abc") + param = self._make_one("foo", sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): field = self._make_one("test", _make_subparam("bar", "STRING", "abc")) other = object() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 7a97c7b78..d6febcfb1 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2751,9 +2751,9 @@ def test_to_arrow_w_unknown_type(self): self.assertEqual(ages, [33, 29]) self.assertEqual(sports, ["volleyball", "basketball"]) - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("sport" in str(warning)) + # Expect warning from both the arrow conversion, and the json deserialization. + self.assertEqual(len(warned), 2) + self.assertTrue(all("sport" in str(warning) for warning in warned)) def test_to_arrow_w_empty_table(self): pyarrow = pytest.importorskip( From e7e0fcf9d5db708d4d4cadd3437ba6d883cf186a Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 14 Aug 2024 12:30:51 -0400 Subject: [PATCH 343/536] Bug: revises Exception type (#1994) * revises Exception type * updates error choices --- google/cloud/bigquery/job/base.py | 2 +- tests/unit/test_job_retry.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 6f9726181..f165fd036 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -49,7 +49,7 @@ "notImplemented": http.client.NOT_IMPLEMENTED, "policyViolation": http.client.FORBIDDEN, "quotaExceeded": http.client.FORBIDDEN, - "rateLimitExceeded": http.client.FORBIDDEN, + "rateLimitExceeded": http.client.TOO_MANY_REQUESTS, "resourceInUse": http.client.BAD_REQUEST, "resourcesExceeded": http.client.BAD_REQUEST, "responseTooLarge": http.client.FORBIDDEN, diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 298ab9a56..958986052 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -442,7 +442,7 @@ def api_request(method, path, query_params=None, data=None, **kw): orig_job_id = job.job_id job_retry = dict(job_retry=None) if job_retry_on_query == "Result" else {} - with pytest.raises(google.api_core.exceptions.Forbidden): + with pytest.raises(google.api_core.exceptions.TooManyRequests): job.result(**job_retry) assert job.job_id == orig_job_id From bd83cfd2eb25cec58d59af8048f5188d748b083d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 14 Aug 2024 14:15:35 -0400 Subject: [PATCH 344/536] fix: add docfx to the presubmit configuration and delete docs-presubmit (#1995) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * adjusts location of checks related to docfx/docs * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .kokoro/presubmit/presubmit.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index ce3953120..ac4cc5847 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -3,5 +3,5 @@ # Disable system tests. env_vars: { key: "NOX_SESSION" - value: "unit_noextras unit cover docs" + value: "unit_noextras unit cover docs docfx" } From fabb2baebdc8a046c4fa00f9e1a65495430b927d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 16 Aug 2024 21:24:33 +0200 Subject: [PATCH 345/536] chore(deps): update all dependencies (#1993) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin PyYAML===6.0.1 for python 3.7 * fix the version pinning --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 13 +++++++------ samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1a1cf4b04..f388c8248 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,25 +1,25 @@ -attrs==24.1.0 +attrs==24.2.0 certifi==2024.7.4 cffi===1.15.1; python_version == '3.7' -cffi==1.16.0; python_version >= '3.8' +cffi==1.17.0; python_version >= '3.8' charset-normalizer==3.3.2 click==8.1.7 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.2.0 +db-dtypes==1.3.0 Fiona==1.9.6 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' google-api-core==2.19.1 -google-auth==2.32.0 +google-auth==2.33.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 -google-resumable-media==2.7.1 +google-resumable-media==2.7.2 googleapis-common-protos==1.63.2 grpcio===1.62.2; python_version == '3.7' grpcio==1.65.4; python_version >= '3.8' @@ -43,7 +43,8 @@ pycparser==2.22; python_version >= '3.8' pyparsing==3.1.2 python-dateutil==2.9.0.post0 pytz==2024.1 -PyYAML==6.0.1 +PyYAML===6.0.1; python_version == '3.7' +PyYAML==6.0.2; python_version >= '3.8' requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index a1044c231..956b168dd 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ bigquery_magics==0.1.0 -db-dtypes==1.2.0 +db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 81fa3782c..42b1243eb 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ bigquery-magics==0.1.0 -db-dtypes==1.2.0 +db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' @@ -7,7 +7,7 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.9.1; python_version >= '3.9' +matplotlib==3.9.2; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' From edcb79ca69dba30d8102abebb9d53bc76e4882ee Mon Sep 17 00:00:00 2001 From: Misha Behersky Date: Mon, 19 Aug 2024 21:15:10 +0300 Subject: [PATCH 346/536] fix: do not set job timeout extra property if None (#1987) Co-authored-by: Lingqing Gan --- google/cloud/bigquery/job/base.py | 7 +++++-- tests/unit/job/test_base.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index f165fd036..e5f68c843 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -218,8 +218,11 @@ def job_timeout_ms(self, value): err.__traceback__ ) - """ Docs indicate a string is expected by the API """ - self._properties["jobTimeoutMs"] = str(value) + if value is not None: + # docs indicate a string is expected by the API + self._properties["jobTimeoutMs"] = str(value) + else: + self._properties.pop("jobTimeoutMs", None) @property def labels(self): diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index a7337afd2..2d2f0c13c 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -1320,3 +1320,21 @@ def test_job_timeout_ms(self): # Confirm that integers get converted to strings. job_config.job_timeout_ms = 5000 assert job_config.job_timeout_ms == "5000" # int is converted to string + + def test_job_timeout_is_none_when_set_none(self): + job_config = self._make_one() + job_config.job_timeout_ms = None + # Confirm value is None and not literal string 'None' + assert job_config.job_timeout_ms is None + + def test_job_timeout_properties(self): + # Make sure any value stored in properties is erased + # when setting job_timeout to None. + job_config = self._make_one() + job_config.job_timeout_ms = 4200 + assert job_config.job_timeout_ms == "4200" + assert job_config._properties.get("jobTimeoutMs") == "4200" + + job_config.job_timeout_ms = None + assert job_config.job_timeout_ms is None + assert "jobTimeoutMs" not in job_config._properties From 5352870283ca7d4652aefc73f12645bcf6e1363c Mon Sep 17 00:00:00 2001 From: Yilin Xu Date: Tue, 20 Aug 2024 06:21:01 -0700 Subject: [PATCH 347/536] fix: set pyarrow field nullable to False for a BigQuery field in REPEATED mode (#1999) Co-authored-by: Lingqing Gan --- google/cloud/bigquery/_pandas_helpers.py | 2 +- tests/unit/test__pandas_helpers.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index c21a02569..210ab4875 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -200,7 +200,7 @@ def bq_to_arrow_field(bq_field, array_type=None): # local NULL values. Arrow will gladly interpret these NULL values # as non-NULL and give you an arbitrary value. See: # https://github.com/googleapis/python-bigquery/issues/1692 - nullable=True, + nullable=False if bq_field.mode.upper() == "REPEATED" else True, metadata=metadata, ) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 58d2b73b3..203cc1d1c 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -2002,6 +2002,23 @@ def test_bq_to_arrow_field_type_override(module_under_test): ) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_field_set_repeated_nullable_false(module_under_test): + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("name", "STRING", mode="REPEATED") + ).nullable + is False + ) + + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("name", "STRING", mode="NULLABLE") + ).nullable + is True + ) + + @pytest.mark.parametrize( "field_type, metadata", [ From 1bfc761e15fae000f2a983e90dfc838ca9af4c3e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 27 Aug 2024 20:17:04 +0200 Subject: [PATCH 348/536] chore(deps): update all dependencies (#2002) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 8 ++++---- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index f388c8248..892c1524e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' google-api-core==2.19.1 -google-auth==2.33.0 +google-auth==2.34.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 @@ -22,8 +22,8 @@ google-crc32c==1.5.0 google-resumable-media==2.7.2 googleapis-common-protos==1.63.2 grpcio===1.62.2; python_version == '3.7' -grpcio==1.65.4; python_version >= '3.8' -idna==3.7 +grpcio==1.66.0; python_version >= '3.8' +idna==3.8 munch==4.0.0 mypy-extensions==1.0.0 packaging===24.0; python_version == '3.7' @@ -48,7 +48,7 @@ PyYAML==6.0.2; python_version >= '3.8' requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 -Shapely==2.0.5 +Shapely==2.0.6 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 956b168dd..b08ecad7e 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.1.0 +bigquery_magics==0.1.1 db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 42b1243eb..0467676fc 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.1.0 +bigquery-magics==0.1.1 db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 From ea69fe315592f3a73417f175b1fe4543203cb716 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 28 Aug 2024 01:08:51 +0200 Subject: [PATCH 349/536] chore(deps): update all dependencies (#2004) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 892c1524e..25dd4b319 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -20,7 +20,7 @@ google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.2 -googleapis-common-protos==1.63.2 +googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' grpcio==1.66.0; python_version >= '3.8' idna==3.8 @@ -40,7 +40,7 @@ pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' -pyparsing==3.1.2 +pyparsing==3.1.4 python-dateutil==2.9.0.post0 pytz==2024.1 PyYAML===6.0.1; python_version == '3.7' From 7af65236c928b105299f158084029cf45438e56e Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 4 Sep 2024 12:09:39 -0700 Subject: [PATCH 350/536] testing: remove testing identity override (#2011) * testing: remove testing identity override This PR removes a stale reference to a membership group in samples tests. --- samples/snippets/view.py | 2 +- samples/snippets/view_test.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/samples/snippets/view.py b/samples/snippets/view.py index 94f406890..30e719c79 100644 --- a/samples/snippets/view.py +++ b/samples/snippets/view.py @@ -147,7 +147,7 @@ def grant_access( # Make an API request to get the view dataset ACLs. view_dataset = client.get_dataset(view_dataset_id) - analyst_group_email = "data_analysts@example.com" + analyst_group_email = "example-analyst-group@google.com" # [END bigquery_grant_view_access] # To facilitate testing, we replace values with alternatives # provided by the testing harness. diff --git a/samples/snippets/view_test.py b/samples/snippets/view_test.py index dfa1cdeee..d46595695 100644 --- a/samples/snippets/view_test.py +++ b/samples/snippets/view_test.py @@ -114,7 +114,6 @@ def test_view( project_id, dataset_id, table_id = view_id.split(".") overrides: view.OverridesDict = { - "analyst_group_email": "cloud-dpes-bigquery@google.com", "view_dataset_id": view_dataset_id, "source_dataset_id": source_dataset_id, "view_reference": { @@ -127,5 +126,5 @@ def test_view( assert len(view_dataset.access_entries) != 0 assert len(source_dataset.access_entries) != 0 out, _ = capsys.readouterr() - assert "cloud-dpes-bigquery@google.com" in out + assert "example-analyst-group@google.com" in out assert table_id in out From 3ab5e95984ad521027a4e1efd9f16767403e668d Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 5 Sep 2024 11:28:49 -0700 Subject: [PATCH 351/536] feat: include LegacyPandasError in init imports (#2014) --- google/cloud/bigquery/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index e80907ec9..26d03286f 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -44,6 +44,7 @@ from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlTypeNames from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery.exceptions import LegacyPandasError from google.cloud.bigquery.exceptions import LegacyPyarrowError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions From f0a41618f10e754863617e9efa32707814ca895d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 10 Sep 2024 21:13:15 +0200 Subject: [PATCH 352/536] chore(deps): update all dependencies (#2005) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * add version constraint for google-crc32c --------- Co-authored-by: Owl Bot Co-authored-by: Leah Cole --- samples/geography/requirements.txt | 13 +++++++------ samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements.txt | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 25dd4b319..cfb27cca9 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==24.2.0 -certifi==2024.7.4 +certifi==2024.8.30 cffi===1.15.1; python_version == '3.7' -cffi==1.17.0; python_version >= '3.8' +cffi==1.17.1; python_version >= '3.8' charset-normalizer==3.3.2 click==8.1.7 click-plugins==1.1.1 @@ -13,16 +13,17 @@ geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.19.1 +google-api-core==2.19.2 google-auth==2.34.0 google-cloud-bigquery==3.25.0 -google-cloud-bigquery-storage==2.25.0 +google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 -google-crc32c==1.5.0 +google-crc32c==1.5.0; python_version < '3.9' +google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.66.0; python_version >= '3.8' +grpcio==1.66.1; python_version >= '3.8' idna==3.8 munch==4.0.0 mypy-extensions==1.0.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index b08ecad7e..f18db407e 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,7 +1,7 @@ -bigquery_magics==0.1.1 +bigquery_magics==0.2.0 db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 -google-cloud-bigquery-storage==2.25.0 +google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 0467676fc..0b906c4ea 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ -bigquery-magics==0.1.1 +bigquery-magics==0.2.0 db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 -google-cloud-bigquery-storage==2.25.0 +google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' From 325519afc80133aabe81ca069f2b891ef990acb6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 11 Sep 2024 18:11:59 +0200 Subject: [PATCH 353/536] chore(deps): update all dependencies (#2017) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 68f9039cc..1640e1a95 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 335236a14..1ccebd9cd 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 68f9039cc..1640e1a95 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 68f9039cc..1640e1a95 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 083b20271..bb0b2a6bf 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 From 1b4cca0a3cc788a4570705572d5f04172f6b4b24 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 11 Sep 2024 11:35:42 -0700 Subject: [PATCH 354/536] docs: improve QueryJobConfig.destination docstring (#2016) * docs: improve QueryJobConfig.destination docstring * add space --- google/cloud/bigquery/job/query.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 4ea5687e0..ca2448eaa 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -476,6 +476,11 @@ def destination(self): ID, each separated by ``.``. For example: ``your-project.your_dataset.your_table``. + .. note:: + + Only table ID is passed to the backend, so any configuration + in `~google.cloud.bigquery.table.Table` is discarded. + See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table """ From 847feb48c26e96fdcb1393458f370c79d4c92fed Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 11 Sep 2024 17:16:06 -0400 Subject: [PATCH 355/536] chore: adds Python 3.7/3.8 EOL pending deprecation warning (#2007) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * adds pending deprecation warning * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * revise code to put version function in version helpers * Update noxfile.py * Update google/cloud/bigquery/__init__.py --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- google/cloud/bigquery/__init__.py | 13 ++++++++++++ google/cloud/bigquery/_versions_helpers.py | 14 +++++++++++++ noxfile.py | 23 +++++++++++++++++++--- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 26d03286f..caf75333a 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -115,12 +115,25 @@ from google.cloud.bigquery.table import TimePartitioningType from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import _versions_helpers try: import bigquery_magics # type: ignore except ImportError: bigquery_magics = None +sys_major, sys_minor, sys_micro = _versions_helpers.extract_runtime_version() + +if sys_major == 3 and sys_minor in (7, 8): + warnings.warn( + "The python-bigquery library will stop supporting Python 3.7 " + "and Python 3.8 in a future major release expected in Q4 2024. " + f"Your Python version is {sys_major}.{sys_minor}.{sys_micro}. We " + "recommend that you update soon to ensure ongoing support. For " + "more details, see: [Google Cloud Client Libraries Supported Python Versions policy](https://cloud.google.com/python/docs/supported-python-versions)", + PendingDeprecationWarning, + ) + __all__ = [ "__version__", "Client", diff --git a/google/cloud/bigquery/_versions_helpers.py b/google/cloud/bigquery/_versions_helpers.py index 72d4c921d..cfbf70a8e 100644 --- a/google/cloud/bigquery/_versions_helpers.py +++ b/google/cloud/bigquery/_versions_helpers.py @@ -14,6 +14,7 @@ """Shared helper functions for verifying versions of installed modules.""" +import sys from typing import Any import packaging.version @@ -248,3 +249,16 @@ def try_import(self, raise_if_error: bool = False) -> Any: and PYARROW_VERSIONS.try_import() is not None and PYARROW_VERSIONS.installed_version >= _MIN_PYARROW_VERSION_RANGE ) + + +def extract_runtime_version(): + # Retrieve the version information + version_info = sys.version_info + + # Extract the major, minor, and micro components + major = version_info.major + minor = version_info.minor + micro = version_info.micro + + # Display the version number in a clear format + return major, minor, micro diff --git a/noxfile.py b/noxfile.py index a2df2e094..2376309ff 100644 --- a/noxfile.py +++ b/noxfile.py @@ -116,6 +116,7 @@ def default(session, install_extras=True): session.run( "py.test", "--quiet", + "-W default::PendingDeprecationWarning", "--cov=google/cloud/bigquery", "--cov=tests/unit", "--cov-append", @@ -231,6 +232,7 @@ def system(session): session.run( "py.test", "--quiet", + "-W default::PendingDeprecationWarning", os.path.join("tests", "system"), *session.posargs, ) @@ -299,6 +301,7 @@ def snippets(session): session.run( "py.test", "samples", + "-W default::PendingDeprecationWarning", "--ignore=samples/desktopapp", "--ignore=samples/magics", "--ignore=samples/geography", @@ -401,9 +404,23 @@ def prerelease_deps(session): session.run("python", "-m", "pip", "freeze") # Run all tests, except a few samples tests which require extra dependencies. - session.run("py.test", "tests/unit") - session.run("py.test", "tests/system") - session.run("py.test", "samples/tests") + session.run( + "py.test", + "tests/unit", + "-W default::PendingDeprecationWarning", + ) + + session.run( + "py.test", + "tests/system", + "-W default::PendingDeprecationWarning", + ) + + session.run( + "py.test", + "samples/tests", + "-W default::PendingDeprecationWarning", + ) @nox.session(python=DEFAULT_PYTHON_VERSION) From 255472359f3ed6b6cee06039ebe9059607fd9894 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 12 Sep 2024 16:44:54 +0200 Subject: [PATCH 356/536] chore(deps): update all dependencies (#2018) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/geography/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index cfb27cca9..350419781 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -18,7 +18,7 @@ google-auth==2.34.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 -google-crc32c==1.5.0; python_version < '3.9' +google-crc32c===1.5.0; python_version < '3.9' google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 @@ -36,14 +36,14 @@ proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' -pyasn1==0.6.0; python_version >= '3.8' +pyasn1==0.6.1; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' -pyasn1-modules==0.4.0; python_version >= '3.8' +pyasn1-modules==0.4.1; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' pyparsing==3.1.4 python-dateutil==2.9.0.post0 -pytz==2024.1 +pytz==2024.2 PyYAML===6.0.1; python_version == '3.7' PyYAML==6.0.2; python_version >= '3.8' requests==2.31.0; python_version == '3.7' From b561aaf6bb744300ca668b37e8cb047dc3d428be Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 17 Sep 2024 11:18:05 -0400 Subject: [PATCH 357/536] build(python): release script update (#2024) Source-Link: https://github.com/googleapis/synthtool/commit/71a72973dddbc66ea64073b53eda49f0d22e0942 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/release.sh | 2 +- .kokoro/release/common.cfg | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 6d064ddb9..597e0c326 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:94bb690db96e6242b2567a4860a94d48fa48696d092e51b0884a1a2c0a79a407 -# created: 2024-07-31T14:52:44.926548819Z + digest: sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455 +# created: 2024-09-16T21:04:09.091105552Z diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 81cee716e..453d6f702 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -23,7 +23,7 @@ python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source / export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-1") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-2") cd github/python-bigquery python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index cb8bbaa2e..43b5a1f27 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -28,7 +28,7 @@ before_action { fetch_keystore { keystore_resource { keystore_config_id: 73713 - keyname: "google-cloud-pypi-token-keystore-1" + keyname: "google-cloud-pypi-token-keystore-2" } } } From ef8e92787941ed23b9b2b5ce7c956bcb3754b995 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 20 Sep 2024 23:21:58 +0200 Subject: [PATCH 358/536] chore(deps): update all dependencies (#2025) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 350419781..aa2ccfc28 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,7 @@ google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' grpcio==1.66.1; python_version >= '3.8' -idna==3.8 +idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging===24.0; python_version == '3.7' From ba99b12215995448998fccb6691423f4555a73bf Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 24 Sep 2024 19:46:21 +0200 Subject: [PATCH 359/536] chore(deps): update all dependencies (#2029) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index aa2ccfc28..e51d3d8c9 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,8 +13,8 @@ geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.19.2 -google-auth==2.34.0 +google-api-core==2.20.0 +google-auth==2.35.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 @@ -31,7 +31,7 @@ packaging===24.0; python_version == '3.7' packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index f18db407e..c1aac4bac 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -7,4 +7,4 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 0b906c4ea..c25253e96 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -10,4 +10,4 @@ matplotlib===3.7.4; python_version == '3.8' matplotlib==3.9.2; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' From a76af359525ef3c49c958663f81fd24c9d35e1e7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:47:57 -0400 Subject: [PATCH 360/536] chore(deps): bump fiona from 1.9.6 to 1.10.0 in /samples/geography (#2027) * chore(deps): bump fiona from 1.9.6 to 1.10.0 in /samples/geography Bumps [fiona](https://github.com/Toblerity/Fiona) from 1.9.6 to 1.10.0. - [Release notes](https://github.com/Toblerity/Fiona/releases) - [Changelog](https://github.com/Toblerity/Fiona/blob/main/CHANGES.txt) - [Commits](https://github.com/Toblerity/Fiona/compare/1.9.6...1.10.0) --- updated-dependencies: - dependency-name: fiona dependency-type: direct:production ... Signed-off-by: dependabot[bot] * pin fiona to 1.9.6 for python 3.7 --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e51d3d8c9..cc0f3ad17 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -8,13 +8,14 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.0 -Fiona==1.9.6 +Fiona===1.9.6; python_version == '3.7' +Fiona==1.10.0; python_version >= '3.8' geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.20.0 -google-auth==2.35.0 +google-api-core==2.19.2 +google-auth==2.34.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 @@ -31,7 +32,7 @@ packaging===24.0; python_version == '3.7' packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' From cad34f1afe20bc430c631ba9c2b69e442281d08d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 26 Sep 2024 11:48:45 -0700 Subject: [PATCH 361/536] chore(main): release 3.26.0 (#1973) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a089b8b4..5de99a6ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.26.0](https://github.com/googleapis/python-bigquery/compare/v3.25.0...v3.26.0) (2024-09-25) + + +### Features + +* Include LegacyPandasError in init imports ([#2014](https://github.com/googleapis/python-bigquery/issues/2014)) ([3ab5e95](https://github.com/googleapis/python-bigquery/commit/3ab5e95984ad521027a4e1efd9f16767403e668d)) +* Use `bigquery-magics` package for the `%%bigquery` magic ([#1965](https://github.com/googleapis/python-bigquery/issues/1965)) ([60128a5](https://github.com/googleapis/python-bigquery/commit/60128a522375823422f238312521a2ce356d9177)) + + +### Bug Fixes + +* Add docfx to the presubmit configuration and delete docs-presubmit ([#1995](https://github.com/googleapis/python-bigquery/issues/1995)) ([bd83cfd](https://github.com/googleapis/python-bigquery/commit/bd83cfd2eb25cec58d59af8048f5188d748b083d)) +* Add warning when encountering unknown field types ([#1989](https://github.com/googleapis/python-bigquery/issues/1989)) ([8f5a41d](https://github.com/googleapis/python-bigquery/commit/8f5a41d283a965ca161019588d3a3b2947b04b5b)) +* Allow protobuf 5.x; require protobuf >=3.20.2; proto-plus >=1.22.3 ([#1976](https://github.com/googleapis/python-bigquery/issues/1976)) ([57bf873](https://github.com/googleapis/python-bigquery/commit/57bf873474382cc2cb34243b704bc928fa1b64c6)) +* Do not set job timeout extra property if None ([#1987](https://github.com/googleapis/python-bigquery/issues/1987)) ([edcb79c](https://github.com/googleapis/python-bigquery/commit/edcb79ca69dba30d8102abebb9d53bc76e4882ee)) +* Set pyarrow field nullable to False for a BigQuery field in REPEATED mode ([#1999](https://github.com/googleapis/python-bigquery/issues/1999)) ([5352870](https://github.com/googleapis/python-bigquery/commit/5352870283ca7d4652aefc73f12645bcf6e1363c)) + + +### Dependencies + +* Bump min version of google-api-core and google-cloud-core to 2.x ([#1972](https://github.com/googleapis/python-bigquery/issues/1972)) ([a958732](https://github.com/googleapis/python-bigquery/commit/a958732aed7d9bd51ffde3dc0e6cae9ad7455b54)) + + +### Documentation + +* Add short mode query sample & test ([#1978](https://github.com/googleapis/python-bigquery/issues/1978)) ([ba61a8a](https://github.com/googleapis/python-bigquery/commit/ba61a8ab0da541ba1940211875d7ea2e9e17dfa8)) +* Improve QueryJobConfig.destination docstring ([#2016](https://github.com/googleapis/python-bigquery/issues/2016)) ([1b4cca0](https://github.com/googleapis/python-bigquery/commit/1b4cca0a3cc788a4570705572d5f04172f6b4b24)) + ## [3.25.0](https://github.com/googleapis/python-bigquery/compare/v3.24.0...v3.25.0) (2024-06-17) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index fed077e26..ebc911253 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.25.0" +__version__ = "3.26.0" From 02706e26034570d0307ae47bf7c968945678eeac Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 26 Sep 2024 22:08:28 +0200 Subject: [PATCH 362/536] chore(deps): update all dependencies (#2031) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index c1aac4bac..4652fcdf2 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.2.0 +bigquery_magics==0.3.0 db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index c25253e96..c4b75f3db 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.2.0 +bigquery-magics==0.3.0 db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 From e29b987956e5d80541ad9a573e902938a1373bda Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 1 Oct 2024 23:02:18 +0200 Subject: [PATCH 363/536] chore(deps): update all dependencies (#2033) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 12 ++++++------ samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index dafb60b2a..383829d7d 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.25.0 +google-cloud-bigquery==3.26.0 google-auth-oauthlib==1.2.1 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index cc0f3ad17..1089dc195 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -9,14 +9,14 @@ cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.0 Fiona===1.9.6; python_version == '3.7' -Fiona==1.10.0; python_version >= '3.8' +Fiona==1.10.1; python_version >= '3.8' geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.19.2 -google-auth==2.34.0 -google-cloud-bigquery==3.25.0 +google-api-core==2.20.0 +google-auth==2.35.0 +google-cloud-bigquery==3.26.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 google-crc32c===1.5.0; python_version < '3.9' @@ -32,7 +32,7 @@ packaging===24.0; python_version == '3.7' packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' @@ -56,4 +56,4 @@ typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.2; python_version >= '3.8' +urllib3==2.2.3; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 4652fcdf2..6386fb6d2 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.3.0 +bigquery_magics==0.4.0 db-dtypes==1.3.0 -google.cloud.bigquery==3.25.0 +google.cloud.bigquery==3.26.0 google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index c4b75f3db..7463e1afc 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ -bigquery-magics==0.3.0 +bigquery-magics==0.4.0 db-dtypes==1.3.0 -google-cloud-bigquery==3.25.0 +google-cloud-bigquery==3.26.0 google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 9e181d963..65ce0be9f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.25.0 +google-cloud-bigquery==3.26.0 From 1d8d0a0b0359ae6da5b99fd3fa8cb016b74b8a6c Mon Sep 17 00:00:00 2001 From: Jeff Quinlan-Galper Date: Wed, 9 Oct 2024 03:25:31 -0700 Subject: [PATCH 364/536] Fix typo in legacy docs (#2037) uspported -> supported Co-authored-by: Chalmer Lowe --- docs/bigquery/legacy_proto_types.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/bigquery/legacy_proto_types.rst b/docs/bigquery/legacy_proto_types.rst index bc1e93715..36e9984b9 100644 --- a/docs/bigquery/legacy_proto_types.rst +++ b/docs/bigquery/legacy_proto_types.rst @@ -3,7 +3,7 @@ Legacy proto-based Types for Google Cloud Bigquery v2 API .. warning:: These types are provided for backward compatibility only, and are not maintained - anymore. They might also differ from the types uspported on the backend. It is + anymore. They might also differ from the types supported on the backend. It is therefore strongly advised to migrate to the types found in :doc:`standard_sql`. Also see the :doc:`3.0.0 Migration Guide<../UPGRADING>` for more information. From 7372ad659fd3316a602e90f224e9a3304d4c1419 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 10 Oct 2024 05:32:25 -0400 Subject: [PATCH 365/536] feat: updates to allow users to set max_stream_count (#2039) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a function `determine_requested_streams()` to compare `preserve_order` and the new argument `max_stream_count` to determine how many streams to request. ``` preserve_order (bool): Whether to preserve the order of streams. If True, this limits the number of streams to one (more than one cannot guarantee order). max_stream_count (Union[int, None]]): The maximum number of streams allowed. Must be a non-negative number or None, where None indicates the value is unset. If `max_stream_count` is set, it overrides `preserve_order`. ``` Fixes #2030 🦕 --- google/cloud/bigquery/_pandas_helpers.py | 118 +++++++++++++++++++---- tests/unit/test__pandas_helpers.py | 31 ++++++ 2 files changed, 130 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 210ab4875..bf7d10c0f 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -21,13 +21,14 @@ import logging import queue import warnings -from typing import Any, Union +from typing import Any, Union, Optional, Callable, Generator, List from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema + try: import pandas # type: ignore @@ -75,7 +76,7 @@ def _to_wkb(v): _to_wkb = _to_wkb() try: - from google.cloud.bigquery_storage import ArrowSerializationOptions + from google.cloud.bigquery_storage_v1.types import ArrowSerializationOptions except ImportError: _ARROW_COMPRESSION_SUPPORT = False else: @@ -816,18 +817,54 @@ def _nowait(futures): def _download_table_bqstorage( - project_id, - table, - bqstorage_client, - preserve_order=False, - selected_fields=None, - page_to_item=None, - max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, -): - """Use (faster, but billable) BQ Storage API to construct DataFrame.""" + project_id: str, + table: Any, + bqstorage_client: Any, + preserve_order: bool = False, + selected_fields: Optional[List[Any]] = None, + page_to_item: Optional[Callable] = None, + max_queue_size: Any = _MAX_QUEUE_SIZE_DEFAULT, + max_stream_count: Optional[int] = None, +) -> Generator[Any, None, None]: + """Downloads a BigQuery table using the BigQuery Storage API. + + This method uses the faster, but potentially more expensive, BigQuery + Storage API to download a table as a Pandas DataFrame. It supports + parallel downloads and optional data transformations. + + Args: + project_id (str): The ID of the Google Cloud project containing + the table. + table (Any): The BigQuery table to download. + bqstorage_client (Any): An + authenticated BigQuery Storage API client. + preserve_order (bool, optional): Whether to preserve the order + of the rows as they are read from BigQuery. If True this limits + the number of streams to one and overrides `max_stream_count`. + Defaults to False. + selected_fields (Optional[List[SchemaField]]): + A list of BigQuery schema fields to select for download. If None, + all fields are downloaded. Defaults to None. + page_to_item (Optional[Callable]): An optional callable + function that takes a page of data from the BigQuery Storage API + max_stream_count (Optional[int]): The maximum number of + concurrent streams to use for downloading data. If `preserve_order` + is True, the requested streams are limited to 1 regardless of the + `max_stream_count` value. If 0 or None, then the number of + requested streams will be unbounded. Defaults to None. + + Yields: + pandas.DataFrame: Pandas DataFrames, one for each chunk of data + downloaded from BigQuery. + + Raises: + ValueError: If attempting to read from a specific partition or snapshot. + + Note: + This method requires the `google-cloud-bigquery-storage` library + to be installed. + """ - # Passing a BQ Storage client in implies that the BigQuery Storage library - # is available and can be imported. from google.cloud import bigquery_storage if "$" in table.table_id: @@ -837,10 +874,11 @@ def _download_table_bqstorage( if "@" in table.table_id: raise ValueError("Reading from a specific snapshot is not currently supported.") - requested_streams = 1 if preserve_order else 0 + requested_streams = determine_requested_streams(preserve_order, max_stream_count) - requested_session = bigquery_storage.types.ReadSession( - table=table.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW + requested_session = bigquery_storage.types.stream.ReadSession( + table=table.to_bqstorage(), + data_format=bigquery_storage.types.stream.DataFormat.ARROW, ) if selected_fields is not None: for field in selected_fields: @@ -848,7 +886,8 @@ def _download_table_bqstorage( if _ARROW_COMPRESSION_SUPPORT: requested_session.read_options.arrow_serialization_options.buffer_compression = ( - ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + # CompressionCodec(1) -> LZ4_FRAME + ArrowSerializationOptions.CompressionCodec(1) ) session = bqstorage_client.create_read_session( @@ -884,7 +923,7 @@ def _download_table_bqstorage( elif max_queue_size is None: max_queue_size = 0 # unbounded - worker_queue = queue.Queue(maxsize=max_queue_size) + worker_queue: queue.Queue[int] = queue.Queue(maxsize=max_queue_size) with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: try: @@ -910,7 +949,7 @@ def _download_table_bqstorage( # we want to block on the queue's get method, instead. This # prevents the queue from filling up, because the main thread # has smaller gaps in time between calls to the queue's get - # method. For a detailed explaination, see: + # method. For a detailed explanation, see: # https://friendliness.dev/2019/06/18/python-nowait/ done, not_done = _nowait(not_done) for future in done: @@ -949,6 +988,7 @@ def download_arrow_bqstorage( preserve_order=False, selected_fields=None, max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, + max_stream_count=None, ): return _download_table_bqstorage( project_id, @@ -958,6 +998,7 @@ def download_arrow_bqstorage( selected_fields=selected_fields, page_to_item=_bqstorage_page_to_arrow, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) @@ -970,6 +1011,7 @@ def download_dataframe_bqstorage( preserve_order=False, selected_fields=None, max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, + max_stream_count=None, ): page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes) return _download_table_bqstorage( @@ -980,6 +1022,7 @@ def download_dataframe_bqstorage( selected_fields=selected_fields, page_to_item=page_to_item, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) @@ -1024,3 +1067,40 @@ def verify_pandas_imports(): raise ValueError(_NO_PANDAS_ERROR) from pandas_import_exception if db_dtypes is None: raise ValueError(_NO_DB_TYPES_ERROR) from db_dtypes_import_exception + + +def determine_requested_streams( + preserve_order: bool, + max_stream_count: Union[int, None], +) -> int: + """Determines the value of requested_streams based on the values of + `preserve_order` and `max_stream_count`. + + Args: + preserve_order (bool): Whether to preserve the order of streams. If True, + this limits the number of streams to one. `preserve_order` takes + precedence over `max_stream_count`. + max_stream_count (Union[int, None]]): The maximum number of streams + allowed. Must be a non-negative number or None, where None indicates + the value is unset. NOTE: if `preserve_order` is also set, it takes + precedence over `max_stream_count`, thus to ensure that `max_stream_count` + is used, ensure that `preserve_order` is None. + + Returns: + (int) The appropriate value for requested_streams. + """ + + if preserve_order: + # If preserve order is set, it takes precendence. + # Limit the requested streams to 1, to ensure that order + # is preserved) + return 1 + + elif max_stream_count is not None: + # If preserve_order is not set, only then do we consider max_stream_count + if max_stream_count <= -1: + raise ValueError("max_stream_count must be non-negative OR None") + return max_stream_count + + # Default to zero requested streams (unbounded). + return 0 diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 203cc1d1c..3a5fddacc 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -18,6 +18,7 @@ import functools import operator import queue +from typing import Union from unittest import mock import warnings @@ -46,6 +47,7 @@ from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import determine_requested_streams pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() @@ -2053,3 +2055,32 @@ def test_verify_pandas_imports_no_db_dtypes(module_under_test, monkeypatch): monkeypatch.setattr(module_under_test, "db_dtypes", None) with pytest.raises(ValueError, match="Please install the 'db-dtypes' package"): module_under_test.verify_pandas_imports() + + +@pytest.mark.parametrize( + "preserve_order, max_stream_count, expected_requested_streams", + [ + # If preserve_order is set/True, it takes precedence: + (True, 10, 1), # use 1 + (True, None, 1), # use 1 + # If preserve_order is not set check max_stream_count: + (False, 10, 10), # max_stream_count (X) takes precedence + (False, None, 0), # Unbounded (0) when both are unset + ], +) +def test_determine_requested_streams( + preserve_order: bool, + max_stream_count: Union[int, None], + expected_requested_streams: int, +): + """Tests various combinations of preserve_order and max_stream_count.""" + actual_requested_streams = determine_requested_streams( + preserve_order, max_stream_count + ) + assert actual_requested_streams == expected_requested_streams + + +def test_determine_requested_streams_invalid_max_stream_count(): + """Tests that a ValueError is raised if max_stream_count is negative.""" + with pytest.raises(ValueError): + determine_requested_streams(preserve_order=False, max_stream_count=-1) From 7b03d61b8bc848fa2fd2722ca5e2c628e2f76eac Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 1 Nov 2024 10:18:55 -0400 Subject: [PATCH 366/536] build: use multiScm for Kokoro release builds (#2049) Source-Link: https://github.com/googleapis/synthtool/commit/0da16589204e7f61911f64fcb30ac2d3b6e59b31 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:5cddfe2fb5019bbf78335bc55f15bc13e18354a56b3ff46e1834f8e540807f05 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 +- .github/release-trigger.yml | 1 + .kokoro/docker/docs/requirements.txt | 42 +- .kokoro/docs/common.cfg | 2 +- .kokoro/release.sh | 2 +- .kokoro/release/common.cfg | 8 +- .kokoro/requirements.txt | 610 +++++++++---------- .kokoro/samples/python3.13/common.cfg | 40 ++ .kokoro/samples/python3.13/continuous.cfg | 6 + .kokoro/samples/python3.13/periodic-head.cfg | 11 + .kokoro/samples/python3.13/periodic.cfg | 6 + .kokoro/samples/python3.13/presubmit.cfg | 6 + .kokoro/test-samples-impl.sh | 3 +- CONTRIBUTING.rst | 6 +- samples/desktopapp/noxfile.py | 2 +- samples/geography/noxfile.py | 2 +- samples/magics/noxfile.py | 2 +- samples/notebooks/noxfile.py | 2 +- samples/snippets/noxfile.py | 2 +- 19 files changed, 398 insertions(+), 359 deletions(-) create mode 100644 .kokoro/samples/python3.13/common.cfg create mode 100644 .kokoro/samples/python3.13/continuous.cfg create mode 100644 .kokoro/samples/python3.13/periodic-head.cfg create mode 100644 .kokoro/samples/python3.13/periodic.cfg create mode 100644 .kokoro/samples/python3.13/presubmit.cfg diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 597e0c326..7672b49b6 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455 -# created: 2024-09-16T21:04:09.091105552Z + digest: sha256:5cddfe2fb5019bbf78335bc55f15bc13e18354a56b3ff46e1834f8e540807f05 +# created: 2024-10-31T01:41:07.349286254Z diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml index d4ca94189..4bb79e58e 100644 --- a/.github/release-trigger.yml +++ b/.github/release-trigger.yml @@ -1 +1,2 @@ enabled: true +multiScmName: diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 7129c7715..66eacc82f 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -4,39 +4,39 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.4.0 \ - --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ - --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f +argcomplete==3.5.1 \ + --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ + --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 # via nox colorlog==6.8.2 \ --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 # via nox -distlib==0.3.8 \ - --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ - --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 +distlib==0.3.9 \ + --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ + --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 # via virtualenv -filelock==3.15.4 \ - --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ - --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 +filelock==3.16.1 \ + --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ + --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv -nox==2024.4.15 \ - --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ - --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f +nox==2024.10.9 \ + --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ + --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 # via -r requirements.in packaging==24.1 \ --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 # via nox -platformdirs==4.2.2 \ - --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ - --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 +platformdirs==4.3.6 \ + --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ + --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f +tomli==2.0.2 \ + --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ + --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed # via nox -virtualenv==20.26.3 \ - --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ - --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 +virtualenv==20.26.6 \ + --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ + --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 # via nox diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg index 41b86fc29..76ae5f13b 100644 --- a/.kokoro/docs/common.cfg +++ b/.kokoro/docs/common.cfg @@ -63,4 +63,4 @@ before_action { keyname: "docuploader_service_account" } } -} \ No newline at end of file +} diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 453d6f702..65deb5ed3 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -23,7 +23,7 @@ python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source / export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-2") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-3") cd github/python-bigquery python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 43b5a1f27..6f57163f5 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -28,17 +28,11 @@ before_action { fetch_keystore { keystore_resource { keystore_config_id: 73713 - keyname: "google-cloud-pypi-token-keystore-2" + keyname: "google-cloud-pypi-token-keystore-3" } } } -# Tokens needed to report release status back to GitHub -env_vars: { - key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" -} - # Store the packages we uploaded to PyPI. That way, we have a record of exactly # what we published, which we can use to generate SBOMs and attestations. action { diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 9622baf0b..006d8ef93 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -4,79 +4,94 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.4.0 \ - --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ - --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f +argcomplete==3.5.1 \ + --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ + --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 # via nox -attrs==23.2.0 \ - --hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \ - --hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 +attrs==24.2.0 \ + --hash=sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346 \ + --hash=sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2 # via gcp-releasetool backports-tarfile==1.2.0 \ --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \ --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991 # via jaraco-context -cachetools==5.3.3 \ - --hash=sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945 \ - --hash=sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105 +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a # via google-auth -certifi==2024.7.4 \ - --hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \ - --hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90 +certifi==2024.8.30 \ + --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \ + --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9 # via requests -cffi==1.16.0 \ - --hash=sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc \ - --hash=sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a \ - --hash=sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417 \ - --hash=sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab \ - --hash=sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520 \ - --hash=sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36 \ - --hash=sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743 \ - --hash=sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8 \ - --hash=sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed \ - --hash=sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684 \ - --hash=sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56 \ - --hash=sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324 \ - --hash=sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d \ - --hash=sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235 \ - --hash=sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e \ - --hash=sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088 \ - --hash=sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000 \ - --hash=sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7 \ - --hash=sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e \ - --hash=sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673 \ - --hash=sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c \ - --hash=sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe \ - --hash=sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2 \ - --hash=sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098 \ - --hash=sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8 \ - --hash=sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a \ - --hash=sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0 \ - --hash=sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b \ - --hash=sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896 \ - --hash=sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e \ - --hash=sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9 \ - --hash=sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2 \ - --hash=sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b \ - --hash=sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6 \ - --hash=sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404 \ - --hash=sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f \ - --hash=sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0 \ - --hash=sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4 \ - --hash=sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc \ - --hash=sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936 \ - --hash=sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba \ - --hash=sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872 \ - --hash=sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb \ - --hash=sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614 \ - --hash=sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1 \ - --hash=sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d \ - --hash=sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969 \ - --hash=sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b \ - --hash=sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4 \ - --hash=sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627 \ - --hash=sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956 \ - --hash=sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357 +cffi==1.17.1 \ + --hash=sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8 \ + --hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \ + --hash=sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1 \ + --hash=sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15 \ + --hash=sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36 \ + --hash=sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824 \ + --hash=sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8 \ + --hash=sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36 \ + --hash=sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17 \ + --hash=sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf \ + --hash=sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc \ + --hash=sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3 \ + --hash=sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed \ + --hash=sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702 \ + --hash=sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1 \ + --hash=sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8 \ + --hash=sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903 \ + --hash=sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6 \ + --hash=sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d \ + --hash=sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b \ + --hash=sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e \ + --hash=sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be \ + --hash=sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c \ + --hash=sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683 \ + --hash=sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9 \ + --hash=sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c \ + --hash=sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8 \ + --hash=sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1 \ + --hash=sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4 \ + --hash=sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655 \ + --hash=sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67 \ + --hash=sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595 \ + --hash=sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0 \ + --hash=sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65 \ + --hash=sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41 \ + --hash=sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6 \ + --hash=sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401 \ + --hash=sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6 \ + --hash=sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3 \ + --hash=sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16 \ + --hash=sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93 \ + --hash=sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e \ + --hash=sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4 \ + --hash=sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964 \ + --hash=sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c \ + --hash=sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576 \ + --hash=sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0 \ + --hash=sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3 \ + --hash=sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662 \ + --hash=sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3 \ + --hash=sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff \ + --hash=sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5 \ + --hash=sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd \ + --hash=sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f \ + --hash=sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5 \ + --hash=sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14 \ + --hash=sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d \ + --hash=sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9 \ + --hash=sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7 \ + --hash=sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382 \ + --hash=sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a \ + --hash=sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e \ + --hash=sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a \ + --hash=sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4 \ + --hash=sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99 \ + --hash=sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87 \ + --hash=sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b # via cryptography charset-normalizer==2.1.1 \ --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ @@ -97,72 +112,67 @@ colorlog==6.8.2 \ # via # gcp-docuploader # nox -cryptography==42.0.8 \ - --hash=sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad \ - --hash=sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583 \ - --hash=sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b \ - --hash=sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c \ - --hash=sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1 \ - --hash=sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648 \ - --hash=sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949 \ - --hash=sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba \ - --hash=sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c \ - --hash=sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9 \ - --hash=sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d \ - --hash=sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c \ - --hash=sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e \ - --hash=sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2 \ - --hash=sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d \ - --hash=sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7 \ - --hash=sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70 \ - --hash=sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2 \ - --hash=sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7 \ - --hash=sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14 \ - --hash=sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe \ - --hash=sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e \ - --hash=sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71 \ - --hash=sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961 \ - --hash=sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7 \ - --hash=sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c \ - --hash=sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28 \ - --hash=sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842 \ - --hash=sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902 \ - --hash=sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801 \ - --hash=sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a \ - --hash=sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e +cryptography==43.0.1 \ + --hash=sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494 \ + --hash=sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806 \ + --hash=sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d \ + --hash=sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062 \ + --hash=sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2 \ + --hash=sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4 \ + --hash=sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1 \ + --hash=sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85 \ + --hash=sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84 \ + --hash=sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042 \ + --hash=sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d \ + --hash=sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962 \ + --hash=sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2 \ + --hash=sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa \ + --hash=sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d \ + --hash=sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365 \ + --hash=sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96 \ + --hash=sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47 \ + --hash=sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d \ + --hash=sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d \ + --hash=sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c \ + --hash=sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb \ + --hash=sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277 \ + --hash=sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172 \ + --hash=sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034 \ + --hash=sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a \ + --hash=sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289 # via # -r requirements.in # gcp-releasetool # secretstorage -distlib==0.3.8 \ - --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ - --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 +distlib==0.3.9 \ + --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ + --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 # via virtualenv docutils==0.21.2 \ --hash=sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f \ --hash=sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 # via readme-renderer -filelock==3.15.4 \ - --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ - --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 +filelock==3.16.1 \ + --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ + --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv gcp-docuploader==0.6.5 \ --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea # via -r requirements.in -gcp-releasetool==2.0.1 \ - --hash=sha256:34314a910c08e8911d9c965bd44f8f2185c4f556e737d719c33a41f6a610de96 \ - --hash=sha256:b0d5863c6a070702b10883d37c4bdfd74bf930fe417f36c0c965d3b7c779ae62 +gcp-releasetool==2.1.1 \ + --hash=sha256:25639269f4eae510094f9dbed9894977e1966933211eb155a451deebc3fc0b30 \ + --hash=sha256:845f4ded3d9bfe8cc7fdaad789e83f4ea014affa77785259a7ddac4b243e099e # via -r requirements.in -google-api-core==2.19.1 \ - --hash=sha256:f12a9b8309b5e21d92483bbd47ce2c445861ec7d269ef6784ecc0ea8c1fa6125 \ - --hash=sha256:f4695f1e3650b316a795108a76a1c416e6afb036199d1c1f1f110916df479ffd +google-api-core==2.21.0 \ + --hash=sha256:4a152fd11a9f774ea606388d423b68aa7e6d6a0ffe4c8266f74979613ec09f81 \ + --hash=sha256:6869eacb2a37720380ba5898312af79a4d30b8bca1548fb4093e0697dc4bdf5d # via # google-cloud-core # google-cloud-storage -google-auth==2.31.0 \ - --hash=sha256:042c4702efa9f7d3c48d3a69341c209381b125faa6dbf3ebe56bc7e40ae05c23 \ - --hash=sha256:87805c36970047247c8afe614d4e3af8eceafc1ebba0c679fe75ddd1d575e871 +google-auth==2.35.0 \ + --hash=sha256:25df55f327ef021de8be50bad0dfd4a916ad0de96da86cd05661c9297723ad3f \ + --hash=sha256:f4c64ed4e01e8e8b646ef34c018f8bf3338df0c8e37d8b3bba40e7f574a3278a # via # gcp-releasetool # google-api-core @@ -172,97 +182,56 @@ google-cloud-core==2.4.1 \ --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 # via google-cloud-storage -google-cloud-storage==2.17.0 \ - --hash=sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388 \ - --hash=sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1 +google-cloud-storage==2.18.2 \ + --hash=sha256:97a4d45c368b7d401ed48c4fdfe86e1e1cb96401c9e199e419d289e2c0370166 \ + --hash=sha256:aaf7acd70cdad9f274d29332673fcab98708d0e1f4dceb5a5356aaef06af4d99 # via gcp-docuploader -google-crc32c==1.5.0 \ - --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ - --hash=sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876 \ - --hash=sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c \ - --hash=sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289 \ - --hash=sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298 \ - --hash=sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02 \ - --hash=sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f \ - --hash=sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2 \ - --hash=sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a \ - --hash=sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb \ - --hash=sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210 \ - --hash=sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5 \ - --hash=sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee \ - --hash=sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c \ - --hash=sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a \ - --hash=sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314 \ - --hash=sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd \ - --hash=sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65 \ - --hash=sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37 \ - --hash=sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4 \ - --hash=sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13 \ - --hash=sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894 \ - --hash=sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31 \ - --hash=sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e \ - --hash=sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709 \ - --hash=sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740 \ - --hash=sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc \ - --hash=sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d \ - --hash=sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c \ - --hash=sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c \ - --hash=sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d \ - --hash=sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906 \ - --hash=sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61 \ - --hash=sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57 \ - --hash=sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c \ - --hash=sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a \ - --hash=sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438 \ - --hash=sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946 \ - --hash=sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7 \ - --hash=sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96 \ - --hash=sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091 \ - --hash=sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae \ - --hash=sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d \ - --hash=sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88 \ - --hash=sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2 \ - --hash=sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd \ - --hash=sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541 \ - --hash=sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728 \ - --hash=sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178 \ - --hash=sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968 \ - --hash=sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346 \ - --hash=sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8 \ - --hash=sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93 \ - --hash=sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7 \ - --hash=sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273 \ - --hash=sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462 \ - --hash=sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94 \ - --hash=sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd \ - --hash=sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e \ - --hash=sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57 \ - --hash=sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b \ - --hash=sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9 \ - --hash=sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a \ - --hash=sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100 \ - --hash=sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325 \ - --hash=sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183 \ - --hash=sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556 \ - --hash=sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4 +google-crc32c==1.6.0 \ + --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ + --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ + --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ + --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ + --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ + --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ + --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ + --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ + --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ + --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ + --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ + --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ + --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ + --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ + --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ + --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ + --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ + --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ + --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ + --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ + --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ + --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ + --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ + --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ + --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ + --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ + --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.7.1 \ - --hash=sha256:103ebc4ba331ab1bfdac0250f8033627a2cd7cde09e7ccff9181e31ba4315b2c \ - --hash=sha256:eae451a7b2e2cdbaaa0fd2eb00cc8a1ee5e95e16b55597359cbc3d27d7d90e33 +google-resumable-media==2.7.2 \ + --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ + --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 # via google-cloud-storage -googleapis-common-protos==1.63.2 \ - --hash=sha256:27a2499c7e8aff199665b22741997e485eccc8645aa9176c7c988e6fae507945 \ - --hash=sha256:27c5abdffc4911f28101e635de1533fb4cfd2c37fbaa9174587c799fac90aa87 +googleapis-common-protos==1.65.0 \ + --hash=sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63 \ + --hash=sha256:334a29d07cddc3aa01dee4988f9afd9b2916ee2ff49d6b757155dc0d197852c0 # via google-api-core -idna==3.7 \ - --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ - --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 +idna==3.10 \ + --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ + --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 # via requests -importlib-metadata==8.0.0 \ - --hash=sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f \ - --hash=sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812 +importlib-metadata==8.5.0 \ + --hash=sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b \ + --hash=sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7 # via # -r requirements.in # keyring @@ -271,13 +240,13 @@ jaraco-classes==3.4.0 \ --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \ --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790 # via keyring -jaraco-context==5.3.0 \ - --hash=sha256:3e16388f7da43d384a1a7cd3452e72e14732ac9fe459678773a3608a812bf266 \ - --hash=sha256:c2f67165ce1f9be20f32f650f25d8edfc1646a8aeee48ae06fb35f90763576d2 +jaraco-context==6.0.1 \ + --hash=sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3 \ + --hash=sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4 # via keyring -jaraco-functools==4.0.1 \ - --hash=sha256:3b24ccb921d6b593bdceb56ce14799204f473976e2a9d4b15b04d0f2c2326664 \ - --hash=sha256:d33fa765374c0611b52f8b3a795f8900869aa88c84769d4d1746cd68fb28c3e8 +jaraco-functools==4.1.0 \ + --hash=sha256:70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d \ + --hash=sha256:ad159f13428bc4acbf5541ad6dec511f91573b90fba04df61dafa2a1231cf649 # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ @@ -289,9 +258,9 @@ jinja2==3.1.4 \ --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d # via gcp-releasetool -keyring==25.2.1 \ - --hash=sha256:2458681cdefc0dbc0b7eb6cf75d0b98e59f9ad9b2d4edd319d18f68bdca95e50 \ - --hash=sha256:daaffd42dbda25ddafb1ad5fec4024e5bbcfe424597ca1ca452b299861e49f1b +keyring==25.4.1 \ + --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ + --hash=sha256:b07ebc55f3e8ed86ac81dd31ef14e81ace9dd9c3d4b5d77a6e9a2016d0d71a1b # via # gcp-releasetool # twine @@ -299,75 +268,76 @@ markdown-it-py==3.0.0 \ --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb # via rich -markupsafe==2.1.5 \ - --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \ - --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \ - --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \ - --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \ - --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \ - --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \ - --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \ - --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \ - --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \ - --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \ - --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \ - --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \ - --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \ - --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \ - --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \ - --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \ - --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \ - --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \ - --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \ - --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \ - --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \ - --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \ - --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \ - --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \ - --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \ - --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \ - --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \ - --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \ - --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \ - --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \ - --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \ - --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \ - --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \ - --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \ - --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \ - --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \ - --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \ - --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \ - --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \ - --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \ - --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \ - --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \ - --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \ - --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \ - --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \ - --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \ - --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \ - --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \ - --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \ - --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \ - --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \ - --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \ - --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \ - --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \ - --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \ - --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \ - --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \ - --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \ - --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \ - --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68 +markupsafe==3.0.1 \ + --hash=sha256:0778de17cff1acaeccc3ff30cd99a3fd5c50fc58ad3d6c0e0c4c58092b859396 \ + --hash=sha256:0f84af7e813784feb4d5e4ff7db633aba6c8ca64a833f61d8e4eade234ef0c38 \ + --hash=sha256:17b2aea42a7280db02ac644db1d634ad47dcc96faf38ab304fe26ba2680d359a \ + --hash=sha256:242d6860f1fd9191aef5fae22b51c5c19767f93fb9ead4d21924e0bcb17619d8 \ + --hash=sha256:244dbe463d5fb6d7ce161301a03a6fe744dac9072328ba9fc82289238582697b \ + --hash=sha256:26627785a54a947f6d7336ce5963569b5d75614619e75193bdb4e06e21d447ad \ + --hash=sha256:2a4b34a8d14649315c4bc26bbfa352663eb51d146e35eef231dd739d54a5430a \ + --hash=sha256:2ae99f31f47d849758a687102afdd05bd3d3ff7dbab0a8f1587981b58a76152a \ + --hash=sha256:312387403cd40699ab91d50735ea7a507b788091c416dd007eac54434aee51da \ + --hash=sha256:3341c043c37d78cc5ae6e3e305e988532b072329639007fd408a476642a89fd6 \ + --hash=sha256:33d1c36b90e570ba7785dacd1faaf091203d9942bc036118fab8110a401eb1a8 \ + --hash=sha256:3e683ee4f5d0fa2dde4db77ed8dd8a876686e3fc417655c2ece9a90576905344 \ + --hash=sha256:3ffb4a8e7d46ed96ae48805746755fadd0909fea2306f93d5d8233ba23dda12a \ + --hash=sha256:40621d60d0e58aa573b68ac5e2d6b20d44392878e0bfc159012a5787c4e35bc8 \ + --hash=sha256:40f1e10d51c92859765522cbd79c5c8989f40f0419614bcdc5015e7b6bf97fc5 \ + --hash=sha256:45d42d132cff577c92bfba536aefcfea7e26efb975bd455db4e6602f5c9f45e7 \ + --hash=sha256:48488d999ed50ba8d38c581d67e496f955821dc183883550a6fbc7f1aefdc170 \ + --hash=sha256:4935dd7883f1d50e2ffecca0aa33dc1946a94c8f3fdafb8df5c330e48f71b132 \ + --hash=sha256:4c2d64fdba74ad16138300815cfdc6ab2f4647e23ced81f59e940d7d4a1469d9 \ + --hash=sha256:4c8817557d0de9349109acb38b9dd570b03cc5014e8aabf1cbddc6e81005becd \ + --hash=sha256:4ffaaac913c3f7345579db4f33b0020db693f302ca5137f106060316761beea9 \ + --hash=sha256:5a4cb365cb49b750bdb60b846b0c0bc49ed62e59a76635095a179d440540c346 \ + --hash=sha256:62fada2c942702ef8952754abfc1a9f7658a4d5460fabe95ac7ec2cbe0d02abc \ + --hash=sha256:67c519635a4f64e495c50e3107d9b4075aec33634272b5db1cde839e07367589 \ + --hash=sha256:6a54c43d3ec4cf2a39f4387ad044221c66a376e58c0d0e971d47c475ba79c6b5 \ + --hash=sha256:7044312a928a66a4c2a22644147bc61a199c1709712069a344a3fb5cfcf16915 \ + --hash=sha256:730d86af59e0e43ce277bb83970530dd223bf7f2a838e086b50affa6ec5f9295 \ + --hash=sha256:800100d45176652ded796134277ecb13640c1a537cad3b8b53da45aa96330453 \ + --hash=sha256:80fcbf3add8790caddfab6764bde258b5d09aefbe9169c183f88a7410f0f6dea \ + --hash=sha256:82b5dba6eb1bcc29cc305a18a3c5365d2af06ee71b123216416f7e20d2a84e5b \ + --hash=sha256:852dc840f6d7c985603e60b5deaae1d89c56cb038b577f6b5b8c808c97580f1d \ + --hash=sha256:8ad4ad1429cd4f315f32ef263c1342166695fad76c100c5d979c45d5570ed58b \ + --hash=sha256:8ae369e84466aa70f3154ee23c1451fda10a8ee1b63923ce76667e3077f2b0c4 \ + --hash=sha256:93e8248d650e7e9d49e8251f883eed60ecbc0e8ffd6349e18550925e31bd029b \ + --hash=sha256:973a371a55ce9ed333a3a0f8e0bcfae9e0d637711534bcb11e130af2ab9334e7 \ + --hash=sha256:9ba25a71ebf05b9bb0e2ae99f8bc08a07ee8e98c612175087112656ca0f5c8bf \ + --hash=sha256:a10860e00ded1dd0a65b83e717af28845bb7bd16d8ace40fe5531491de76b79f \ + --hash=sha256:a4792d3b3a6dfafefdf8e937f14906a51bd27025a36f4b188728a73382231d91 \ + --hash=sha256:a7420ceda262dbb4b8d839a4ec63d61c261e4e77677ed7c66c99f4e7cb5030dd \ + --hash=sha256:ad91738f14eb8da0ff82f2acd0098b6257621410dcbd4df20aaa5b4233d75a50 \ + --hash=sha256:b6a387d61fe41cdf7ea95b38e9af11cfb1a63499af2759444b99185c4ab33f5b \ + --hash=sha256:b954093679d5750495725ea6f88409946d69cfb25ea7b4c846eef5044194f583 \ + --hash=sha256:bbde71a705f8e9e4c3e9e33db69341d040c827c7afa6789b14c6e16776074f5a \ + --hash=sha256:beeebf760a9c1f4c07ef6a53465e8cfa776ea6a2021eda0d0417ec41043fe984 \ + --hash=sha256:c91b394f7601438ff79a4b93d16be92f216adb57d813a78be4446fe0f6bc2d8c \ + --hash=sha256:c97ff7fedf56d86bae92fa0a646ce1a0ec7509a7578e1ed238731ba13aabcd1c \ + --hash=sha256:cb53e2a99df28eee3b5f4fea166020d3ef9116fdc5764bc5117486e6d1211b25 \ + --hash=sha256:cbf445eb5628981a80f54087f9acdbf84f9b7d862756110d172993b9a5ae81aa \ + --hash=sha256:d06b24c686a34c86c8c1fba923181eae6b10565e4d80bdd7bc1c8e2f11247aa4 \ + --hash=sha256:d98e66a24497637dd31ccab090b34392dddb1f2f811c4b4cd80c230205c074a3 \ + --hash=sha256:db15ce28e1e127a0013dfb8ac243a8e392db8c61eae113337536edb28bdc1f97 \ + --hash=sha256:db842712984e91707437461930e6011e60b39136c7331e971952bb30465bc1a1 \ + --hash=sha256:e24bfe89c6ac4c31792793ad9f861b8f6dc4546ac6dc8f1c9083c7c4f2b335cd \ + --hash=sha256:e81c52638315ff4ac1b533d427f50bc0afc746deb949210bc85f05d4f15fd772 \ + --hash=sha256:e9393357f19954248b00bed7c56f29a25c930593a77630c719653d51e7669c2a \ + --hash=sha256:ee3941769bd2522fe39222206f6dd97ae83c442a94c90f2b7a25d847d40f4729 \ + --hash=sha256:f31ae06f1328595d762c9a2bf29dafd8621c7d3adc130cbb46278079758779ca \ + --hash=sha256:f94190df587738280d544971500b9cafc9b950d32efcb1fba9ac10d84e6aa4e6 \ + --hash=sha256:fa7d686ed9883f3d664d39d5a8e74d3c5f63e603c2e3ff0abcba23eac6542635 \ + --hash=sha256:fb532dd9900381d2e8f48172ddc5a59db4c445a11b9fab40b3b786da40d3b56b \ + --hash=sha256:fe32482b37b4b00c7a52a07211b479653b7fe4f22b2e481b9a9b099d8a430f2f # via jinja2 mdurl==0.1.2 \ --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba # via markdown-it-py -more-itertools==10.3.0 \ - --hash=sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463 \ - --hash=sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320 +more-itertools==10.5.0 \ + --hash=sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef \ + --hash=sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6 # via # jaraco-classes # jaraco-functools @@ -389,9 +359,9 @@ nh3==0.2.18 \ --hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \ --hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe # via readme-renderer -nox==2024.4.15 \ - --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ - --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f +nox==2024.10.9 \ + --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ + --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 # via -r requirements.in packaging==24.1 \ --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ @@ -403,41 +373,41 @@ pkginfo==1.10.0 \ --hash=sha256:5df73835398d10db79f8eecd5cd86b1f6d29317589ea70796994d49399af6297 \ --hash=sha256:889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 # via twine -platformdirs==4.2.2 \ - --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ - --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 +platformdirs==4.3.6 \ + --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ + --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv proto-plus==1.24.0 \ --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 # via google-api-core -protobuf==5.27.2 \ - --hash=sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505 \ - --hash=sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b \ - --hash=sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38 \ - --hash=sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863 \ - --hash=sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470 \ - --hash=sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6 \ - --hash=sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce \ - --hash=sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca \ - --hash=sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5 \ - --hash=sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e \ - --hash=sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714 +protobuf==5.28.2 \ + --hash=sha256:2c69461a7fcc8e24be697624c09a839976d82ae75062b11a0972e41fd2cd9132 \ + --hash=sha256:35cfcb15f213449af7ff6198d6eb5f739c37d7e4f1c09b5d0641babf2cc0c68f \ + --hash=sha256:52235802093bd8a2811abbe8bf0ab9c5f54cca0a751fdd3f6ac2a21438bffece \ + --hash=sha256:59379674ff119717404f7454647913787034f03fe7049cbef1d74a97bb4593f0 \ + --hash=sha256:5e8a95246d581eef20471b5d5ba010d55f66740942b95ba9b872d918c459452f \ + --hash=sha256:87317e9bcda04a32f2ee82089a204d3a2f0d3c8aeed16568c7daf4756e4f1fe0 \ + --hash=sha256:8ddc60bf374785fb7cb12510b267f59067fa10087325b8e1855b898a0d81d276 \ + --hash=sha256:a8b9403fc70764b08d2f593ce44f1d2920c5077bf7d311fefec999f8c40f78b7 \ + --hash=sha256:c0ea0123dac3399a2eeb1a1443d82b7afc9ff40241433296769f7da42d142ec3 \ + --hash=sha256:ca53faf29896c526863366a52a8f4d88e69cd04ec9571ed6082fa117fac3ab36 \ + --hash=sha256:eeea10f3dc0ac7e6b4933d32db20662902b4ab81bf28df12218aa389e9c2102d # via # gcp-docuploader # gcp-releasetool # google-api-core # googleapis-common-protos # proto-plus -pyasn1==0.6.0 \ - --hash=sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c \ - --hash=sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473 +pyasn1==0.6.1 \ + --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ + --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 # via # pyasn1-modules # rsa -pyasn1-modules==0.4.0 \ - --hash=sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6 \ - --hash=sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b +pyasn1-modules==0.4.1 \ + --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ + --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c # via google-auth pycparser==2.22 \ --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ @@ -449,9 +419,9 @@ pygments==2.18.0 \ # via # readme-renderer # rich -pyjwt==2.8.0 \ - --hash=sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de \ - --hash=sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320 +pyjwt==2.9.0 \ + --hash=sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850 \ + --hash=sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c # via gcp-releasetool pyperclip==1.9.0 \ --hash=sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310 @@ -481,9 +451,9 @@ rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c # via twine -rich==13.7.1 \ - --hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \ - --hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432 +rich==13.9.2 \ + --hash=sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c \ + --hash=sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1 # via twine rsa==4.9 \ --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ @@ -499,9 +469,9 @@ six==1.16.0 \ # via # gcp-docuploader # python-dateutil -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f +tomli==2.0.2 \ + --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ + --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed # via nox twine==5.1.1 \ --hash=sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997 \ @@ -510,28 +480,30 @@ twine==5.1.1 \ typing-extensions==4.12.2 \ --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 - # via -r requirements.in -urllib3==2.2.2 \ - --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ - --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 + # via + # -r requirements.in + # rich +urllib3==2.2.3 \ + --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \ + --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9 # via # requests # twine -virtualenv==20.26.3 \ - --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ - --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 +virtualenv==20.26.6 \ + --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ + --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 # via nox -wheel==0.43.0 \ - --hash=sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85 \ - --hash=sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81 +wheel==0.44.0 \ + --hash=sha256:2376a90c98cc337d18623527a97c31797bd02bad0033d41547043a1cbfbe448f \ + --hash=sha256:a29c3f2817e95ab89aa4660681ad547c0e9547f20e75b0562fe7723c9a2a9d49 # via -r requirements.in -zipp==3.19.2 \ - --hash=sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19 \ - --hash=sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c +zipp==3.20.2 \ + --hash=sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350 \ + --hash=sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==70.2.0 \ - --hash=sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05 \ - --hash=sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1 +setuptools==75.1.0 \ + --hash=sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2 \ + --hash=sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538 # via -r requirements.in diff --git a/.kokoro/samples/python3.13/common.cfg b/.kokoro/samples/python3.13/common.cfg new file mode 100644 index 000000000..ee9688995 --- /dev/null +++ b/.kokoro/samples/python3.13/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.13" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-313" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" diff --git a/.kokoro/samples/python3.13/continuous.cfg b/.kokoro/samples/python3.13/continuous.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.13/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.13/periodic-head.cfg b/.kokoro/samples/python3.13/periodic-head.cfg new file mode 100644 index 000000000..5aa01bab5 --- /dev/null +++ b/.kokoro/samples/python3.13/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.13/periodic.cfg b/.kokoro/samples/python3.13/periodic.cfg new file mode 100644 index 000000000..71cd1e597 --- /dev/null +++ b/.kokoro/samples/python3.13/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/.kokoro/samples/python3.13/presubmit.cfg b/.kokoro/samples/python3.13/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.13/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 55910c8ba..53e365bc4 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -33,7 +33,8 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Install nox -python3.9 -m pip install --upgrade --quiet nox +# `virtualenv==20.26.6` is added for Python 3.7 compatibility +python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6 # Use secrets acessor service account to get secrets if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 7be61e6b6..1900c5e36 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. + 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.12 -- -k + $ nox -s unit-3.13 -- -k .. note:: @@ -227,6 +227,7 @@ We support: - `Python 3.10`_ - `Python 3.11`_ - `Python 3.12`_ +- `Python 3.13`_ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ @@ -234,6 +235,7 @@ We support: .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ .. _Python 3.12: https://docs.python.org/3.12/ +.. _Python 3.13: https://docs.python.org/3.13/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/samples/desktopapp/noxfile.py b/samples/desktopapp/noxfile.py index 3b7135946..c9a3d1ecb 100644 --- a/samples/desktopapp/noxfile.py +++ b/samples/desktopapp/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 3b7135946..c9a3d1ecb 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index 3b7135946..c9a3d1ecb 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/notebooks/noxfile.py b/samples/notebooks/noxfile.py index 3b7135946..c9a3d1ecb 100644 --- a/samples/notebooks/noxfile.py +++ b/samples/notebooks/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 3b7135946..c9a3d1ecb 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] From fef8b886bc86d355c7745585fc53dc8a5a019ab1 Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Fri, 1 Nov 2024 12:53:52 -0400 Subject: [PATCH 367/536] chore: two fixit fixes (#2050) --- google/cloud/bigquery/client.py | 34 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 1c222f2dd..52c5084e3 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -328,6 +328,15 @@ def get_service_account_email( ) -> str: """Get the email address of the project's BigQuery service account + Example: + + .. code-block:: python + + from google.cloud import bigquery + client = bigquery.Client() + client.get_service_account_email() + # returns an email similar to: my_service_account@my-project.iam.gserviceaccount.com + Note: This is the service account that BigQuery uses to manage tables encrypted by a key in KMS. @@ -345,13 +354,6 @@ def get_service_account_email( str: service account email address - Example: - - >>> from google.cloud import bigquery - >>> client = bigquery.Client() - >>> client.get_service_account_email() - my_service_account@my-project.iam.gserviceaccount.com - """ if project is None: project = self.project @@ -629,9 +631,19 @@ def create_dataset( ) -> Dataset: """API call: create the dataset via a POST request. + See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert + Example: + + .. code-block:: python + + from google.cloud import bigquery + client = bigquery.Client() + dataset = bigquery.Dataset('my_project.my_dataset') + dataset = client.create_dataset(dataset) + Args: dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ @@ -658,14 +670,6 @@ def create_dataset( Raises: google.cloud.exceptions.Conflict: If the dataset already exists. - - Example: - - >>> from google.cloud import bigquery - >>> client = bigquery.Client() - >>> dataset = bigquery.Dataset('my_project.my_dataset') - >>> dataset = client.create_dataset(dataset) - """ dataset = self._dataset_from_arg(dataset) if isinstance(dataset, DatasetReference): From 53c289e032caf083505b33bef323878671b58cd9 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 7 Nov 2024 10:17:28 -0500 Subject: [PATCH 368/536] chore(main): release 3.27.0 (#2040) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Chalmer Lowe --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5de99a6ca..989b7f020 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.27.0](https://github.com/googleapis/python-bigquery/compare/v3.26.0...v3.27.0) (2024-11-01) + + +### Features + +* Updates to allow users to set max_stream_count ([#2039](https://github.com/googleapis/python-bigquery/issues/2039)) ([7372ad6](https://github.com/googleapis/python-bigquery/commit/7372ad659fd3316a602e90f224e9a3304d4c1419)) + ## [3.26.0](https://github.com/googleapis/python-bigquery/compare/v3.25.0...v3.26.0) (2024-09-25) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index ebc911253..8f4418777 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.26.0" +__version__ = "3.27.0" From a4d9534a900f13ae7355904cda05097d781f27e3 Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Thu, 7 Nov 2024 15:56:57 -0500 Subject: [PATCH 369/536] docs: render fields correctly for update calls (#2055) --- google/cloud/bigquery/client.py | 82 ++++++++++++++++----------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 52c5084e3..97f239f7a 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1184,6 +1184,19 @@ def update_dataset( must be provided. If a field is listed in ``fields`` and is ``None`` in ``dataset``, it will be deleted. + For example, to update the default expiration times, specify + both properties in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_dataset( + dataset, + [ + "default_partition_expiration_ms", + "default_table_expiration_ms", + ] + ) + If ``dataset.etag`` is not ``None``, the update will only succeed if the dataset on the server has the same ETag. Thus reading a dataset with ``get_dataset``, changing its fields, @@ -1198,19 +1211,6 @@ def update_dataset( The properties of ``dataset`` to change. These are strings corresponding to the properties of :class:`~google.cloud.bigquery.dataset.Dataset`. - - For example, to update the default expiration times, specify - both properties in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_dataset( - dataset, - [ - "default_partition_expiration_ms", - "default_table_expiration_ms", - ] - ) retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1254,6 +1254,15 @@ def update_model( must be provided. If a field is listed in ``fields`` and is ``None`` in ``model``, the field value will be deleted. + For example, to update the descriptive properties of the model, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_model( + model, ["description", "friendly_name"] + ) + If ``model.etag`` is not ``None``, the update will only succeed if the model on the server has the same ETag. Thus reading a model with ``get_model``, changing its fields, and then passing it to @@ -1266,15 +1275,6 @@ def update_model( The properties of ``model`` to change. These are strings corresponding to the properties of :class:`~google.cloud.bigquery.model.Model`. - - For example, to update the descriptive properties of the model, - specify them in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_model( - model, ["description", "friendly_name"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1318,6 +1318,15 @@ def update_routine( must be provided. If a field is listed in ``fields`` and is ``None`` in ``routine``, the field value will be deleted. + For example, to update the description property of the routine, + specify it in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_routine( + routine, ["description"] + ) + .. warning:: During beta, partial updates are not supported. You must provide all fields in the resource. @@ -1336,15 +1345,6 @@ def update_routine( fields (Sequence[str]): The fields of ``routine`` to change, spelled as the :class:`~google.cloud.bigquery.routine.Routine` properties. - - For example, to update the description property of the routine, - specify it in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_routine( - routine, ["description"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1392,6 +1392,16 @@ def update_table( must be provided. If a field is listed in ``fields`` and is ``None`` in ``table``, the field value will be deleted. + For example, to update the descriptive properties of the table, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_table( + table, + ["description", "friendly_name"] + ) + If ``table.etag`` is not ``None``, the update will only succeed if the table on the server has the same ETag. Thus reading a table with ``get_table``, changing its fields, and then passing it to @@ -1403,16 +1413,6 @@ def update_table( fields (Sequence[str]): The fields of ``table`` to change, spelled as the :class:`~google.cloud.bigquery.table.Table` properties. - - For example, to update the descriptive properties of the table, - specify them in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_table( - table, - ["description", "friendly_name"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): From 9050ddca47f703c2630a49b278f3f3779469f66f Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 11 Nov 2024 11:35:36 -0800 Subject: [PATCH 370/536] chore(python): remove obsolete release scripts and config files (#2057) Source-Link: https://github.com/googleapis/synthtool/commit/635751753776b1a7cabd4dcaa48013a96274372d Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:91d0075c6f2fd6a073a06168feee19fa2a8507692f2519a1dc7de3366d157e99 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .github/release-trigger.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 7672b49b6..b2770d4e0 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5cddfe2fb5019bbf78335bc55f15bc13e18354a56b3ff46e1834f8e540807f05 -# created: 2024-10-31T01:41:07.349286254Z + digest: sha256:91d0075c6f2fd6a073a06168feee19fa2a8507692f2519a1dc7de3366d157e99 +# created: 2024-11-11T16:13:09.302418532Z diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml index 4bb79e58e..b975c190d 100644 --- a/.github/release-trigger.yml +++ b/.github/release-trigger.yml @@ -1,2 +1,2 @@ enabled: true -multiScmName: +multiScmName: python-bigquery From 0277f171a2d12c370120e3e82199667ad40e1a99 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Tue, 12 Nov 2024 09:48:26 -0500 Subject: [PATCH 371/536] build: Use python 3.10 for docs session (#2058) Co-authored-by: Lingqing Gan --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 2376309ff..750a6b459 100644 --- a/noxfile.py +++ b/noxfile.py @@ -462,7 +462,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python="3.9") +@nox.session(python="3.10") @_calculate_duration def docs(session): """Build the docs.""" From b2f33df4dd8627cab1571cde9f7f98a345fa6957 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 12 Nov 2024 14:05:24 -0500 Subject: [PATCH 372/536] chore(python): update dependencies in .kokoro/docker/docs (#2060) Source-Link: https://github.com/googleapis/synthtool/commit/59171c8f83f3522ce186e4d110d27e772da4ba7a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/docker/docs/requirements.txt | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index b2770d4e0..6301519a9 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:91d0075c6f2fd6a073a06168feee19fa2a8507692f2519a1dc7de3366d157e99 -# created: 2024-11-11T16:13:09.302418532Z + digest: sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 +# created: 2024-11-12T12:09:45.821174897Z diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 66eacc82f..8bb076459 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --allow-unsafe --generate-hashes requirements.in @@ -8,9 +8,9 @@ argcomplete==3.5.1 \ --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 # via nox -colorlog==6.8.2 \ - --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ - --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 +colorlog==6.9.0 \ + --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ + --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 # via nox distlib==0.3.9 \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ @@ -24,9 +24,9 @@ nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 # via -r requirements.in -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 +packaging==24.2 \ + --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ + --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f # via nox platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ @@ -36,7 +36,7 @@ tomli==2.0.2 \ --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed # via nox -virtualenv==20.26.6 \ - --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ - --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 +virtualenv==20.27.1 \ + --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \ + --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4 # via nox From 106161180ead01aca1ead909cf06ca559f68666d Mon Sep 17 00:00:00 2001 From: Rin Arakaki Date: Wed, 13 Nov 2024 22:20:57 +0900 Subject: [PATCH 373/536] feat: migrate to pyproject.toml (#2041) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Migrate to pyproject.toml * Update * Add copyright notice * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update pyproject.toml --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- pyproject.toml | 104 +++++++++++++++++++++++++++++++ setup.py | 128 +-------------------------------------- tests/unit/test_table.py | 2 +- 3 files changed, 107 insertions(+), 127 deletions(-) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..44a958323 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,104 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "google-cloud-bigquery" +authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }] +license = { text = "Apache 2.0" } +requires-python = ">=3.7" +description = "Google BigQuery API client library" +readme = "README.rst" +classifiers = [ + # Should be one of: + # "Development Status :: 3 - Alpha" + # "Development Status :: 4 - Beta" + # "Development Status :: 5 - Production/Stable" + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Topic :: Internet", +] +dependencies = [ + "google-api-core[grpc] >= 2.11.1, < 3.0.0dev", + "google-auth >= 2.14.1, < 3.0.0dev", + "google-cloud-core >= 2.4.1, < 3.0.0dev", + "google-resumable-media >= 2.0.0, < 3.0dev", + "packaging >= 20.0.0", + "python-dateutil >= 2.7.3, < 3.0dev", + "requests >= 2.21.0, < 3.0.0dev", +] +dynamic = ["version"] + +[project.urls] +Repository = "https://github.com/googleapis/python-bigquery" + +[project.optional-dependencies] +# bqstorage had a period where it was a required dependency, and has been +# moved back to optional due to bloat. See +# https://github.com/googleapis/python-bigquery/issues/1196 for more background. +bqstorage = [ + "google-cloud-bigquery-storage >= 2.6.0, < 3.0.0dev", + # Due to an issue in pip's dependency resolver, the `grpc` extra is not + # installed, even though `google-cloud-bigquery-storage` specifies it + # as `google-api-core[grpc]`. We thus need to explicitly specify it here. + # See: https://github.com/googleapis/python-bigquery/issues/83 The + # grpc.Channel.close() method isn't added until 1.32.0. + # https://github.com/grpc/grpc/pull/15254 + "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", + "pyarrow >= 3.0.0", +] +pandas = [ + "pandas >= 1.1.0", + "pyarrow >= 3.0.0", + "db-dtypes >= 0.3.0, < 2.0.0dev", + "importlib_metadata >= 1.0.0; python_version < '3.8'", +] +ipywidgets = ["ipywidgets >= 7.7.0", "ipykernel >= 6.0.0"] +geopandas = ["geopandas >= 0.9.0, < 1.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] +ipython = ["bigquery-magics >= 0.1.0"] +tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] +opentelemetry = [ + "opentelemetry-api >= 1.1.0", + "opentelemetry-sdk >= 1.1.0", + "opentelemetry-instrumentation >= 0.20b0", +] +bigquery_v2 = [ + "proto-plus >= 1.22.3, < 2.0.0dev", + "protobuf >= 3.20.2, < 6.0.0dev, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. +] +all = [ + "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,tqdm,opentelemetry,bigquery_v2]", +] + +[tool.setuptools.dynamic] +version = { attr = "google.cloud.bigquery.version.__version__" } + +[tool.setuptools.packages.find] +# Only include packages under the 'google' namespace. Do not include tests, +# benchmarks, etc. +include = ["google*"] diff --git a/setup.py b/setup.py index 617685543..2ad29ecbf 100644 --- a/setup.py +++ b/setup.py @@ -12,131 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import io -import os +import setuptools # type: ignore -import setuptools - -# Package metadata. - -name = "google-cloud-bigquery" -description = "Google BigQuery API client library" - -# Should be one of: -# 'Development Status :: 3 - Alpha' -# 'Development Status :: 4 - Beta' -# 'Development Status :: 5 - Production/Stable' -release_status = "Development Status :: 5 - Production/Stable" -dependencies = [ - "google-api-core[grpc] >= 2.11.1, <3.0.0dev", - "google-auth >= 2.14.1, <3.0.0dev", - "google-cloud-core >= 2.4.1, <3.0.0dev", - "google-resumable-media >= 2.0.0, < 3.0dev", - "packaging >= 20.0.0", - "python-dateutil >= 2.7.3, <3.0dev", - "requests >= 2.21.0, < 3.0.0dev", -] -pyarrow_dependency = "pyarrow >= 3.0.0" -extras = { - # bqstorage had a period where it was a required dependency, and has been - # moved back to optional due to bloat. See - # https://github.com/googleapis/python-bigquery/issues/1196 for more background. - "bqstorage": [ - "google-cloud-bigquery-storage >= 2.6.0, <3.0.0dev", - # Due to an issue in pip's dependency resolver, the `grpc` extra is not - # installed, even though `google-cloud-bigquery-storage` specifies it - # as `google-api-core[grpc]`. We thus need to explicitly specify it here. - # See: https://github.com/googleapis/python-bigquery/issues/83 The - # grpc.Channel.close() method isn't added until 1.32.0. - # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.47.0, < 2.0dev", - "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", - pyarrow_dependency, - ], - "pandas": [ - "pandas>=1.1.0", - pyarrow_dependency, - "db-dtypes>=0.3.0,<2.0.0dev", - "importlib_metadata>=1.0.0; python_version<'3.8'", - ], - "ipywidgets": [ - "ipywidgets>=7.7.0", - "ipykernel>=6.0.0", - ], - "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <3.0.0dev"], - "ipython": [ - "bigquery-magics >= 0.1.0", - ], - "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], - "opentelemetry": [ - "opentelemetry-api >= 1.1.0", - "opentelemetry-sdk >= 1.1.0", - "opentelemetry-instrumentation >= 0.20b0", - ], - "bigquery_v2": [ - "proto-plus >= 1.22.3, <2.0.0dev", - "protobuf>=3.20.2,<6.0.0dev,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. - ], -} - -all_extras = [] - -for extra in extras: - all_extras.extend(extras[extra]) - -extras["all"] = all_extras - -# Setup boilerplate below this line. - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, "README.rst") -with io.open(readme_filename, encoding="utf-8") as readme_file: - readme = readme_file.read() - -version = {} -with open(os.path.join(package_root, "google/cloud/bigquery/version.py")) as fp: - exec(fp.read(), version) -version = version["__version__"] - -# Only include packages under the 'google' namespace. Do not include tests, -# benchmarks, etc. -packages = [ - package - for package in setuptools.find_namespace_packages() - if package.startswith("google") -] - -setuptools.setup( - name=name, - version=version, - description=description, - long_description=readme, - author="Google LLC", - author_email="googleapis-packages@google.com", - license="Apache 2.0", - url="https://github.com/googleapis/python-bigquery", - classifiers=[ - release_status, - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Operating System :: OS Independent", - "Topic :: Internet", - ], - platforms="Posix; MacOS X; Windows", - packages=packages, - install_requires=dependencies, - extras_require=extras, - python_requires=">=3.7", - include_package_data=True, - zip_safe=False, -) +setuptools.setup() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index d6febcfb1..018a096df 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2011,7 +2011,7 @@ def _make_one( path=None, schema=None, table=None, - **kwargs + **kwargs, ): from google.cloud.bigquery.table import TableReference From 27370b102246fecf2287781714f0544f5bb8ab04 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 15 Nov 2024 19:17:17 +0100 Subject: [PATCH 374/536] chore(deps): update all dependencies (#2038) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1089dc195..30b4a54a1 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.66.1; python_version >= '3.8' +grpcio==1.66.2; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 From f2ab8cbfe00d442ad3b40683ecfec320e53b4688 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 15 Nov 2024 13:58:58 -0500 Subject: [PATCH 375/536] fix: Allow geopandas 1.x (#2065) Expand range to avoid diamond dependency issues See https://pypi.org/project/geopandas/1.0.1/ --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 44a958323..ecf21d922 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ pandas = [ "importlib_metadata >= 1.0.0; python_version < '3.8'", ] ipywidgets = ["ipywidgets >= 7.7.0", "ipykernel >= 6.0.0"] -geopandas = ["geopandas >= 0.9.0, < 1.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] +geopandas = ["geopandas >= 0.9.0, < 2.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] ipython = ["bigquery-magics >= 0.1.0"] tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] opentelemetry = [ From 458648f52e71c89e8746f2173a63400dc2553b33 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 15 Nov 2024 21:29:06 +0100 Subject: [PATCH 376/536] chore(deps): update all dependencies (#2064) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * revert * Pin pyparsing for Python 3.7/3.8 * revert * Pin pyarrow for Python 3.8 --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 26 ++++++++++++++------------ samples/magics/requirements.txt | 6 +++--- samples/notebooks/requirements.txt | 6 +++--- samples/snippets/requirements.txt | 2 +- 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 383829d7d..165800741 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.26.0 +google-cloud-bigquery==3.27.0 google-auth-oauthlib==1.2.1 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 30b4a54a1..42f2b5a86 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -2,47 +2,49 @@ attrs==24.2.0 certifi==2024.8.30 cffi===1.15.1; python_version == '3.7' cffi==1.17.1; python_version >= '3.8' -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 click==8.1.7 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.3.0 +db-dtypes==1.3.1 Fiona===1.9.6; python_version == '3.7' Fiona==1.10.1; python_version >= '3.8' geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.20.0 -google-auth==2.35.0 -google-cloud-bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +google-api-core==2.23.0 +google-auth==2.36.0 +google-cloud-bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 google-cloud-core==2.4.1 google-crc32c===1.5.0; python_version < '3.9' google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 -googleapis-common-protos==1.65.0 +googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.66.2; python_version >= '3.8' +grpcio==1.67.1; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging===24.0; python_version == '3.7' -packaging==24.1; python_version >= '3.8' +packaging==24.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' -proto-plus==1.24.0 +proto-plus==1.25.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==17.0.0; python_version >= '3.8' +pyarrow==17.0.0; python_version == '3.8' +pyarrow==18.0.0; python_version >= '3.9' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.1; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.1; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' -pyparsing==3.1.4 +pyparsing===3.1.4; python_version < '3.9' +pyparsing==3.2.0; python_version >= '3.9' python-dateutil==2.9.0.post0 pytz==2024.2 PyYAML===6.0.1; python_version == '3.7' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 6386fb6d2..543d9a512 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,7 +1,7 @@ bigquery_magics==0.4.0 -db-dtypes==1.3.0 -google.cloud.bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +db-dtypes==1.3.1 +google.cloud.bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 7463e1afc..ca8a0a13e 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ bigquery-magics==0.4.0 -db-dtypes==1.3.0 -google-cloud-bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +db-dtypes==1.3.1 +google-cloud-bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 65ce0be9f..307ebac24 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.26.0 +google-cloud-bigquery==3.27.0 From fddf2c5608a2d7bfd4981c8d529178070516b5c0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 15 Nov 2024 22:01:53 +0100 Subject: [PATCH 377/536] chore(deps): update all dependencies (#2068) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin pyarrow --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 42f2b5a86..d08bad258 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -34,8 +34,8 @@ pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' proto-plus==1.25.0 -pyarrow==12.0.1; python_version == '3.7' -pyarrow==17.0.0; python_version == '3.8' +pyarrow===12.0.1; python_version == '3.7' +pyarrow===17.0.0; python_version == '3.8' pyarrow==18.0.0; python_version >= '3.9' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.1; python_version >= '3.8' From fffe6ba6b271180b0c59c9fbf70feb7d6d322906 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 18 Nov 2024 17:28:26 +0100 Subject: [PATCH 378/536] chore(deps): update all dependencies (#2070) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d08bad258..438018f88 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.67.1; python_version >= '3.8' +grpcio==1.68.0; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 From d4612979b812d2a835e47200f27a87a66bcb856a Mon Sep 17 00:00:00 2001 From: Kien Truong Date: Sat, 23 Nov 2024 04:35:54 +0700 Subject: [PATCH 379/536] feat: support setting max_stream_count when fetching query result (#2051) * feat: support setting max_stream_count when fetching query result Allow user to set max_stream_count when fetching result using BigQuery Storage API with RowIterator's incremental methods: * to_arrow_iterable * to_dataframe_iterable * docs: update docs about max_stream_count for ordered query * fix: add max_stream_count params to _EmptyRowIterator's methods * test: add tests for RowIterator's max_stream_count parameter * docs: add notes on valid max_stream_count range in docstring * use a different way to iterate result --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/table.py | 44 +++++++++++++++++++++ tests/unit/test_table.py | 70 ++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index faf827be4..dcaf377e3 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1812,6 +1812,7 @@ def to_arrow_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore + max_stream_count: Optional[int] = None, ) -> Iterator["pyarrow.RecordBatch"]: """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. @@ -1836,6 +1837,22 @@ def to_arrow_iterable( created by the server. If ``max_queue_size`` is :data:`None`, the queue size is infinite. + max_stream_count (Optional[int]): + The maximum number of parallel download streams when + using BigQuery Storage API. Ignored if + BigQuery Storage API is not used. + + This setting also has no effect if the query result + is deterministically ordered with ORDER BY, + in which case, the number of download stream is always 1. + + If set to 0 or None (the default), the number of download + streams is determined by BigQuery the server. However, this behaviour + can require a lot of memory to store temporary download result, + especially with very large queries. In that case, + setting this parameter value to a value > 0 can help + reduce system resource consumption. + Returns: pyarrow.RecordBatch: A generator of :class:`~pyarrow.RecordBatch`. @@ -1852,6 +1869,7 @@ def to_arrow_iterable( preserve_order=self._preserve_order, selected_fields=self._selected_fields, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) tabledata_list_download = functools.partial( _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema @@ -1978,6 +1996,7 @@ def to_dataframe_iterable( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Optional[Dict[str, Any]] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore + max_stream_count: Optional[int] = None, ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2008,6 +2027,22 @@ def to_dataframe_iterable( .. versionadded:: 2.14.0 + max_stream_count (Optional[int]): + The maximum number of parallel download streams when + using BigQuery Storage API. Ignored if + BigQuery Storage API is not used. + + This setting also has no effect if the query result + is deterministically ordered with ORDER BY, + in which case, the number of download stream is always 1. + + If set to 0 or None (the default), the number of download + streams is determined by BigQuery the server. However, this behaviour + can require a lot of memory to store temporary download result, + especially with very large queries. In that case, + setting this parameter value to a value > 0 can help + reduce system resource consumption. + Returns: pandas.DataFrame: A generator of :class:`~pandas.DataFrame`. @@ -2034,6 +2069,7 @@ def to_dataframe_iterable( preserve_order=self._preserve_order, selected_fields=self._selected_fields, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) tabledata_list_download = functools.partial( _pandas_helpers.download_dataframe_row_iterator, @@ -2690,6 +2726,7 @@ def to_dataframe_iterable( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Optional[Dict[str, Any]] = None, max_queue_size: Optional[int] = None, + max_stream_count: Optional[int] = None, ) -> Iterator["pandas.DataFrame"]: """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2705,6 +2742,9 @@ def to_dataframe_iterable( max_queue_size: Ignored. Added for compatibility with RowIterator. + max_stream_count: + Ignored. Added for compatibility with RowIterator. + Returns: An iterator yielding a single empty :class:`~pandas.DataFrame`. @@ -2719,6 +2759,7 @@ def to_arrow_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: Optional[int] = None, + max_stream_count: Optional[int] = None, ) -> Iterator["pyarrow.RecordBatch"]: """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2731,6 +2772,9 @@ def to_arrow_iterable( max_queue_size: Ignored. Added for compatibility with RowIterator. + max_stream_count: + Ignored. Added for compatibility with RowIterator. + Returns: An iterator yielding a single empty :class:`~pyarrow.RecordBatch`. """ diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 018a096df..d81ad2dca 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -5822,3 +5822,73 @@ def test_table_reference_to_bqstorage_v1_stable(table_path): for klass in (mut.TableReference, mut.Table, mut.TableListItem): got = klass.from_string(table_path).to_bqstorage() assert got == expected + + +@pytest.mark.parametrize("preserve_order", [True, False]) +def test_to_arrow_iterable_w_bqstorage_max_stream_count(preserve_order): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + api_request=None, + path=None, + schema=[ + schema.SchemaField("colA", "INTEGER"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + row_iterator._preserve_order = preserve_order + + max_stream_count = 132 + result_iterable = row_iterator.to_arrow_iterable( + bqstorage_client=bqstorage_client, max_stream_count=max_stream_count + ) + list(result_iterable) + bqstorage_client.create_read_session.assert_called_once_with( + parent=mock.ANY, + read_session=mock.ANY, + max_stream_count=max_stream_count if not preserve_order else 1, + ) + + +@pytest.mark.parametrize("preserve_order", [True, False]) +def test_to_dataframe_iterable_w_bqstorage_max_stream_count(preserve_order): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + api_request=None, + path=None, + schema=[ + schema.SchemaField("colA", "INTEGER"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + row_iterator._preserve_order = preserve_order + + max_stream_count = 132 + result_iterable = row_iterator.to_dataframe_iterable( + bqstorage_client=bqstorage_client, max_stream_count=max_stream_count + ) + list(result_iterable) + bqstorage_client.create_read_session.assert_called_once_with( + parent=mock.ANY, + read_session=mock.ANY, + max_stream_count=max_stream_count if not preserve_order else 1, + ) From 9e19ecd9c3b24c6132203859f2b6f6a885e978a9 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 26 Nov 2024 19:16:56 +0100 Subject: [PATCH 380/536] chore(deps): update all dependencies (#2078) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 438018f88..edf5a24b6 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -36,7 +36,7 @@ pandas==2.2.3; python_version >= '3.9' proto-plus==1.25.0 pyarrow===12.0.1; python_version == '3.7' pyarrow===17.0.0; python_version == '3.8' -pyarrow==18.0.0; python_version >= '3.9' +pyarrow==18.1.0; python_version >= '3.9' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.1; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' From 3359ef37b90243bea2d9e68bb996fe5d736f304c Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Wed, 4 Dec 2024 22:24:15 +0900 Subject: [PATCH 381/536] feat: add property for `allowNonIncrementalDefinition` for materialized view (#2084) * feat: property for `allowNonIncrementalDefinition` materialized view Signed-off-by: Yu Ishikawa format Signed-off-by: Yu Ishikawa * Update tests/unit/test_table.py Co-authored-by: Chalmer Lowe * Update google/cloud/bigquery/table.py Co-authored-by: Chalmer Lowe --------- Signed-off-by: Yu Ishikawa Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/table.py | 23 +++++++++++++++++++++++ tests/unit/test_table.py | 10 ++++++++++ 2 files changed, 33 insertions(+) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index dcaf377e3..38542023b 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -390,6 +390,7 @@ class Table(_TableBase): "mview_last_refresh_time": ["materializedView", "lastRefreshTime"], "mview_query": "materializedView", "mview_refresh_interval": "materializedView", + "mview_allow_non_incremental_definition": "materializedView", "num_bytes": "numBytes", "num_rows": "numRows", "partition_expiration": "timePartitioning", @@ -928,6 +929,28 @@ def mview_refresh_interval(self, value): refresh_interval_ms, ) + @property + def mview_allow_non_incremental_definition(self): + """Optional[bool]: This option declares the intention to construct a + materialized view that isn't refreshed incrementally. + The default value is :data:`False`. + """ + api_field = self._PROPERTY_TO_API_FIELD[ + "mview_allow_non_incremental_definition" + ] + return _helpers._get_sub_prop( + self._properties, [api_field, "allowNonIncrementalDefinition"] + ) + + @mview_allow_non_incremental_definition.setter + def mview_allow_non_incremental_definition(self, value): + api_field = self._PROPERTY_TO_API_FIELD[ + "mview_allow_non_incremental_definition" + ] + _helpers._set_sub_prop( + self._properties, [api_field, "allowNonIncrementalDefinition"], value + ) + @property def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index d81ad2dca..ff0593470 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1050,6 +1050,16 @@ def test_mview_refresh_interval(self): table.mview_refresh_interval = None self.assertIsNone(table.mview_refresh_interval) + def test_mview_allow_non_incremental_definition(self): + table = self._make_one() + self.assertIsNone(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = True + self.assertTrue(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = False + self.assertFalse(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = None + self.assertIsNone(table.mview_allow_non_incremental_definition) + def test_from_string(self): cls = self._get_target_class() got = cls.from_string("string-project.string_dataset.string_table") From 729322c2288a30464f2f135ba18b9c4aa7d2f0da Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Sat, 7 Dec 2024 05:56:34 +0900 Subject: [PATCH 382/536] feat: add property for maxStaleness in table definitions (#2087) * feat: add property for maxStaleness in table definitions Signed-off-by: Yu Ishikawa * Update google/cloud/bigquery/table.py --------- Signed-off-by: Yu Ishikawa Co-authored-by: Lingqing Gan --- google/cloud/bigquery/table.py | 35 +++++++++++++++++++++++++++ tests/unit/test_table.py | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 38542023b..80ab330ba 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -407,6 +407,7 @@ class Table(_TableBase): "view_query": "view", "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", + "max_staleness": "maxStaleness", } def __init__(self, table_ref, schema=None) -> None: @@ -1115,6 +1116,40 @@ def __repr__(self): def __str__(self): return f"{self.project}.{self.dataset_id}.{self.table_id}" + @property + def max_staleness(self): + """Union[str, None]: The maximum staleness of data that could be returned when the table is queried. + + Staleness encoded as a string encoding of sql IntervalValue type. + This property is optional and defaults to None. + + According to the BigQuery API documentation, maxStaleness specifies the maximum time + interval for which stale data can be returned when querying the table. + It helps control data freshness in scenarios like metadata-cached external tables. + + Returns: + Optional[str]: A string representing the maximum staleness interval + (e.g., '1h', '30m', '15s' for hours, minutes, seconds respectively). + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["max_staleness"]) + + @max_staleness.setter + def max_staleness(self, value): + """Set the maximum staleness for the table. + + Args: + value (Optional[str]): A string representing the maximum staleness interval. + Must be a valid time interval string. + Examples include '1h' (1 hour), '30m' (30 minutes), '15s' (15 seconds). + + Raises: + ValueError: If the value is not None and not a string. + """ + if value is not None and not isinstance(value, str): + raise ValueError("max_staleness must be a string or None") + + self._properties[self._PROPERTY_TO_API_FIELD["max_staleness"]] = value + class TableListItem(_TableBase): """A read-only table resource from a list operation. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index ff0593470..3824da226 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1475,6 +1475,49 @@ def test___str__(self): table1 = self._make_one(TableReference(dataset, "table1")) self.assertEqual(str(table1), "project1.dataset1.table1") + def test_max_staleness_getter(self): + """Test getting max_staleness property.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Initially None + self.assertIsNone(table.max_staleness) + # Set max_staleness using setter + table.max_staleness = "1h" + self.assertEqual(table.max_staleness, "1h") + + def test_max_staleness_setter(self): + """Test setting max_staleness property.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Set valid max_staleness + table.max_staleness = "30m" + self.assertEqual(table.max_staleness, "30m") + # Set to None + table.max_staleness = None + self.assertIsNone(table.max_staleness) + + def test_max_staleness_setter_invalid_type(self): + """Test setting max_staleness with an invalid type raises ValueError.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Try setting invalid type + with self.assertRaises(ValueError): + table.max_staleness = 123 # Not a string + + def test_max_staleness_to_api_repr(self): + """Test max_staleness is correctly represented in API representation.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Set max_staleness + table.max_staleness = "1h" + # Convert to API representation + resource = table.to_api_repr() + self.assertEqual(resource.get("maxStaleness"), "1h") + class Test_row_from_mapping(unittest.TestCase, _SchemaBase): PROJECT = "prahj-ekt" From 40529de923e25c41c6728c121b9c82a042967ada Mon Sep 17 00:00:00 2001 From: Rin Arakaki Date: Wed, 11 Dec 2024 03:15:11 +0900 Subject: [PATCH 383/536] feat: add type hints to Client (#2044) * add type hints * Update client.py Moves import from being used solely during specific checks to being more universally available. * Update google/cloud/bigquery/client.py * Update client.py testing some minor changes to deal with mypy quirks * Update google/cloud/bigquery/client.py --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/client.py | 36 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 97f239f7a..03ded93b1 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -44,6 +44,8 @@ import uuid import warnings +import requests + from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -65,6 +67,7 @@ DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore +from google.auth.credentials import Credentials from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _job_helpers from google.cloud.bigquery import _pandas_helpers @@ -126,6 +129,7 @@ _versions_helpers.PANDAS_VERSIONS.try_import() ) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this + ResumableTimeoutType = Union[ None, float, Tuple[float, float] ] # for resumable media methods @@ -133,8 +137,6 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] - import requests # required by api-core - _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 @@ -231,15 +233,23 @@ class Client(ClientWithProject): def __init__( self, - project=None, - credentials=None, - _http=None, - location=None, - default_query_job_config=None, - default_load_job_config=None, - client_info=None, - client_options=None, + project: Optional[str] = None, + credentials: Optional[Credentials] = None, + _http: Optional[requests.Session] = None, + location: Optional[str] = None, + default_query_job_config: Optional[QueryJobConfig] = None, + default_load_job_config: Optional[LoadJobConfig] = None, + client_info: Optional[google.api_core.client_info.ClientInfo] = None, + client_options: Optional[ + Union[google.api_core.client_options.ClientOptions, Dict[str, Any]] + ] = None, ) -> None: + if client_options is None: + client_options = {} + if isinstance(client_options, dict): + client_options = google.api_core.client_options.from_dict(client_options) + # assert isinstance(client_options, google.api_core.client_options.ClientOptions) + super(Client, self).__init__( project=project, credentials=credentials, @@ -247,14 +257,10 @@ def __init__( _http=_http, ) - kw_args = {"client_info": client_info} + kw_args: Dict[str, Any] = {"client_info": client_info} bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None client_universe = None - if client_options is None: - client_options = {} - if isinstance(client_options, dict): - client_options = google.api_core.client_options.from_dict(client_options) if client_options.api_endpoint: api_endpoint = client_options.api_endpoint kw_args["api_endpoint"] = api_endpoint From d4d39acb8574f0d06d4e490b859e5fe6b57d0d9e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 18 Dec 2024 02:40:17 +0800 Subject: [PATCH 384/536] chore(python): update dependencies in .kokoro/docker/docs (#2088) Source-Link: https://github.com/googleapis/synthtool/commit/e808c98e1ab7eec3df2a95a05331619f7001daef Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 +-- .kokoro/docker/docs/requirements.txt | 52 ++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 6301519a9..26306af66 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 -# created: 2024-11-12T12:09:45.821174897Z + digest: sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 +# created: 2024-12-17T00:59:58.625514486Z diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 8bb076459..f99a5c4aa 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -2,11 +2,11 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes requirements.in +# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.1 \ - --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ - --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ @@ -23,7 +23,7 @@ filelock==3.16.1 \ nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in + # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in packaging==24.2 \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f @@ -32,11 +32,41 @@ platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv -tomli==2.0.2 \ - --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ - --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed +tomli==2.2.1 \ + --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ + --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ + --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ + --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \ + --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \ + --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \ + --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \ + --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \ + --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \ + --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \ + --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \ + --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \ + --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \ + --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \ + --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \ + --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \ + --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \ + --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \ + --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \ + --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \ + --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \ + --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \ + --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \ + --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \ + --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \ + --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \ + --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \ + --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \ + --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \ + --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ + --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ + --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.27.1 \ - --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \ - --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4 +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa # via nox From 887e126bd6128a7ca1d5a7f00abb50ce044d4c6f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Dec 2024 08:58:58 -0800 Subject: [PATCH 385/536] chore(deps): bump jinja2 from 3.1.4 to 3.1.5 in /.kokoro (#2094) Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.4 to 3.1.5. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5) --- updated-dependencies: - dependency-name: jinja2 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .kokoro/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 006d8ef93..16db448c1 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -254,9 +254,9 @@ jeepney==0.8.0 \ # via # keyring # secretstorage -jinja2==3.1.4 \ - --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ - --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d +jinja2==3.1.5 \ + --hash=sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb \ + --hash=sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb # via gcp-releasetool keyring==25.4.1 \ --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ From aaf1eb85ada95ab866be0199812ea7f5c7f50766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 27 Dec 2024 16:55:01 -0600 Subject: [PATCH 386/536] feat: preserve unknown fields from the REST API representation in `SchemaField` (#2097) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: preserve unknown fields from the REST API representaton in `SchemaField` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove unnecessary variable * remove unused private method * fix pytype --------- Co-authored-by: Owl Bot --- google/cloud/bigquery/schema.py | 82 +++++++++--------------------- tests/unit/job/test_load_config.py | 29 +++++++++-- tests/unit/test_schema.py | 37 +++++++++++--- tests/unit/test_table.py | 32 ++++++++++-- 4 files changed, 105 insertions(+), 75 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index f5b03cbef..b062396cf 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -16,8 +16,9 @@ import collections import enum -from typing import Any, Dict, Iterable, Optional, Union, cast +from typing import Any, cast, Dict, Iterable, Optional, Union +from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -203,15 +204,8 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() - - self._fields = tuple(fields) - - @staticmethod - def __get_int(api_repr, name): - v = api_repr.get(name, _DEFAULT_VALUE) - if v is not _DEFAULT_VALUE: - v = int(v) - return v + if fields: # Don't set the property if it's not set. + self._properties["fields"] = [field.to_api_repr() for field in fields] @classmethod def from_api_repr(cls, api_repr: dict) -> "SchemaField": @@ -225,43 +219,19 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": Returns: google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ - field_type = api_repr["type"].upper() - - # Handle optional properties with default values - mode = api_repr.get("mode", "NULLABLE") - description = api_repr.get("description", _DEFAULT_VALUE) - fields = api_repr.get("fields", ()) - policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) + placeholder = cls("this_will_be_replaced", "PLACEHOLDER") - default_value_expression = api_repr.get("defaultValueExpression", None) + # Note: we don't make a copy of api_repr because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + placeholder._properties = api_repr - if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: - policy_tags = PolicyTagList.from_api_repr(policy_tags) - - if api_repr.get("rangeElementType"): - range_element_type = cast(dict, api_repr.get("rangeElementType")) - element_type = range_element_type.get("type") - else: - element_type = None - - return cls( - field_type=field_type, - fields=[cls.from_api_repr(f) for f in fields], - mode=mode.upper(), - default_value_expression=default_value_expression, - description=description, - name=api_repr["name"], - policy_tags=policy_tags, - precision=cls.__get_int(api_repr, "precision"), - scale=cls.__get_int(api_repr, "scale"), - max_length=cls.__get_int(api_repr, "maxLength"), - range_element_type=element_type, - ) + return placeholder @property def name(self): """str: The name of the field.""" - return self._properties["name"] + return self._properties.get("name", "") @property def field_type(self): @@ -270,7 +240,10 @@ def field_type(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ - return self._properties["type"] + type_ = self._properties.get("type") + if type_ is None: # Shouldn't happen, but some unit tests do this. + return None + return cast(str, type_).upper() @property def mode(self): @@ -279,7 +252,7 @@ def mode(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode """ - return self._properties.get("mode") + return cast(str, self._properties.get("mode", "NULLABLE")).upper() @property def is_nullable(self): @@ -299,17 +272,17 @@ def description(self): @property def precision(self): """Optional[int]: Precision (number of digits) for the NUMERIC field.""" - return self._properties.get("precision") + return _helpers._int_or_none(self._properties.get("precision")) @property def scale(self): """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" - return self._properties.get("scale") + return _helpers._int_or_none(self._properties.get("scale")) @property def max_length(self): """Optional[int]: Maximum length for the STRING or BYTES field.""" - return self._properties.get("maxLength") + return _helpers._int_or_none(self._properties.get("maxLength")) @property def range_element_type(self): @@ -329,7 +302,7 @@ def fields(self): Must be empty unset if ``field_type`` is not 'RECORD'. """ - return self._fields + return tuple(_to_schema_fields(self._properties.get("fields", []))) @property def policy_tags(self): @@ -345,15 +318,10 @@ def to_api_repr(self) -> dict: Returns: Dict: A dictionary representing the SchemaField in a serialized form. """ - answer = self._properties.copy() - - # If this is a RECORD type, then sub-fields are also included, - # add this to the serialized representation. - if self.field_type.upper() in _STRUCT_TYPES: - answer["fields"] = [f.to_api_repr() for f in self.fields] - - # Done; return the serialized dictionary. - return answer + # Note: we don't make a copy of _properties because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + return self._properties def _key(self): """A tuple key that uniquely describes this field. @@ -389,7 +357,7 @@ def _key(self): self.mode.upper(), # pytype: disable=attribute-error self.default_value_expression, self.description, - self._fields, + self.fields, policy_tags, ) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index becf3e959..3a681c476 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import warnings import pytest @@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self): config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) - def test_schema_setter_invalid_mappings_list(self): + def test_schema_setter_allows_unknown_properties(self): config = self._get_target_class()() schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - config.schema = schema + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + config.schema = schema + + # _properties should include all fields, including unknown ones. + assert config._properties["load"]["schema"]["fields"] == expected_schema def test_schema_setter_unsetting_schema(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index b17cd0281..4b0b28158 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud import bigquery -from google.cloud.bigquery.standard_sql import StandardSqlStructType -from google.cloud.bigquery.schema import PolicyTagList +import copy import unittest from unittest import mock import pytest +from google.cloud import bigquery +from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery.schema import PolicyTagList + class TestSchemaField(unittest.TestCase): @staticmethod @@ -821,13 +823,32 @@ def test_schema_fields_sequence(self): result = self._call_fut(schema) self.assertEqual(result, schema) - def test_invalid_mapping_representation(self): + def test_unknown_properties(self): schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "address", "typeooo": "STRING", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - self._call_fut(schema) + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + result = self._call_fut(schema) + + for api_repr, field in zip(expected_schema, result): + assert field.to_api_repr() == api_repr def test_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3824da226..e9d461e9d 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import datetime import logging import re @@ -711,14 +712,35 @@ def test_schema_setter_valid_fields(self): table.schema = [full_name, age] self.assertEqual(table.schema, [full_name, age]) - def test_schema_setter_invalid_mapping_representation(self): + def test_schema_setter_allows_unknown_properties(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"} - invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"} - with self.assertRaises(Exception): - table.schema = [full_name, invalid_field] + schema = [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, + ] + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + table.schema = schema + + # _properties should include all fields, including unknown ones. + assert table._properties["schema"]["fields"] == expected_schema def test_schema_setter_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField From 6cbd5c0a49a5f6e289abc747559b3963933fac90 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 2 Jan 2025 23:46:50 +0100 Subject: [PATCH 387/536] chore(deps): update all dependencies (#2096) * chore(deps): update all dependencies * pin attrs===24.2.0 for python 3.7 * pin urllib3===2.2.3 for python 3.8 * pin matplotlib===3.9.2 for python 3.9 --------- Co-authored-by: Lingqing Gan --- .kokoro/docker/docs/requirements.txt | 6 +++--- samples/desktopapp/requirements-test.txt | 4 ++-- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 24 +++++++++++++----------- samples/magics/requirements-test.txt | 4 ++-- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements-test.txt | 4 ++-- samples/notebooks/requirements.txt | 5 +++-- samples/snippets/requirements-test.txt | 4 ++-- 9 files changed, 29 insertions(+), 26 deletions(-) diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index f99a5c4aa..fb6ffa272 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -4,9 +4,9 @@ # # pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.2 \ - --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ - --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb +argcomplete==3.5.3 \ + --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ + --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 1640e1a95..827b02dcf 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 1ccebd9cd..ef38acb4f 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index edf5a24b6..ab73dbe87 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,21 +1,22 @@ -attrs==24.2.0 -certifi==2024.8.30 +attrs===24.2.0; python_version == '3.7' +attrs==24.3.0; python_version >= '3.8' +certifi==2024.12.14 cffi===1.15.1; python_version == '3.7' cffi==1.17.1; python_version >= '3.8' -charset-normalizer==3.4.0 -click==8.1.7 +charset-normalizer==3.4.1 +click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.1 Fiona===1.9.6; python_version == '3.7' Fiona==1.10.1; python_version >= '3.8' -geojson==3.1.0 +geojson==3.2.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.23.0 -google-auth==2.36.0 +google-api-core==2.24.0 +google-auth==2.37.0 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 google-cloud-core==2.4.1 @@ -24,7 +25,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.68.0; python_version >= '3.8' +grpcio==1.68.1; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 @@ -44,7 +45,7 @@ pyasn1-modules==0.4.1; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' pyparsing===3.1.4; python_version < '3.9' -pyparsing==3.2.0; python_version >= '3.9' +pyparsing==3.2.1; python_version >= '3.9' python-dateutil==2.9.0.post0 pytz==2024.2 PyYAML===6.0.1; python_version == '3.7' @@ -53,9 +54,10 @@ requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 Shapely==2.0.6 -six==1.16.0 +six==1.17.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.3; python_version >= '3.8' +urllib3===2.2.3; python_version == '3.8' +urllib3==2.3.0; python_version >= '3.9' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 1640e1a95..827b02dcf 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 543d9a512..87efa3dec 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.4.0 +bigquery_magics==0.5.0 db-dtypes==1.3.1 google.cloud.bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 1640e1a95..827b02dcf 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index ca8a0a13e..77103a338 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.4.0 +bigquery-magics==0.5.0 db-dtypes==1.3.1 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 @@ -7,7 +7,8 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.9.2; python_version >= '3.9' +matplotlib===3.9.2; python_version == '3.9' +matplotlib==3.10.0; python_version >= '3.10' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index bb0b2a6bf..077e465cf 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 From cc49760de1bab7a2e45fe9e485daacc4eebae1ef Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 3 Jan 2025 23:01:48 +0100 Subject: [PATCH 388/536] chore(deps): update dependency virtualenv to v20.28.1 (#2101) --- .kokoro/docker/docs/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index fb6ffa272..48ace5de9 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -66,7 +66,7 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.0 \ - --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ - --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa +virtualenv==20.28.1 \ + --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ + --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 # via nox From 473c3c30ef5201154c295c41ae9d8a25435a9b3f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 7 Jan 2025 00:17:03 +0100 Subject: [PATCH 389/536] chore(deps): update dependency grpcio to v1.69.0 (#2102) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ab73dbe87..71579867f 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -25,7 +25,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.68.1; python_version >= '3.8' +grpcio==1.69.0; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 From b35d741fe564ac106b2bf9d033b08d808b114363 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:00:45 -0500 Subject: [PATCH 390/536] chore(python): exclude .github/workflows/unittest.yml in renovate config (#2103) Source-Link: https://github.com/googleapis/synthtool/commit/106d292bd234e5d9977231dcfbc4831e34eba13a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 6 +++--- .kokoro/docker/docs/requirements.txt | 12 ++++++------ renovate.json | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 26306af66..10cf433a8 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 -# created: 2024-12-17T00:59:58.625514486Z + digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a +# created: 2025-01-09T12:01:16.422459506Z diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 48ace5de9..f99a5c4aa 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -4,9 +4,9 @@ # # pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.3 \ - --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ - --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ @@ -66,7 +66,7 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.1 \ - --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ - --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa # via nox diff --git a/renovate.json b/renovate.json index 39b2a0ec9..c7875c469 100644 --- a/renovate.json +++ b/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] } From a2bebb95c5ef32ac7c7cbe19c3e7a9412cbee60d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 9 Jan 2025 13:42:37 -0500 Subject: [PATCH 391/536] feat: adds new input validation function similar to isinstance. (#2107) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: adds new function similar to isinstance. * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- google/cloud/bigquery/_helpers.py | 32 ++++++++++++++++++++++++++++++- tests/unit/test__helpers.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 1eda80712..ea47af28d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -22,7 +22,7 @@ import re import os import warnings -from typing import Optional, Union +from typing import Optional, Union, Any, Tuple, Type from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -1004,3 +1004,33 @@ def _verify_job_config_type(job_config, expected_type, param_name="job_config"): job_config=job_config, ) ) + + +def _isinstance_or_raise( + value: Any, + dtype: Union[Type, Tuple[Type, ...]], + none_allowed: Optional[bool] = False, +) -> Any: + """Determine whether a value type matches a given datatype or None. + Args: + value (Any): Value to be checked. + dtype (type): Expected data type or tuple of data types. + none_allowed Optional(bool): whether value is allowed to be None. Default + is False. + Returns: + Any: Returns the input value if the type check is successful. + Raises: + TypeError: If the input value's type does not match the expected data type(s). + """ + if none_allowed and value is None: + return value + + if isinstance(value, dtype): + return value + + or_none = "" + if none_allowed: + or_none = " (or None)" + + msg = f"Pass {value} as a '{dtype}'{or_none}. Got {type(value)}." + raise TypeError(msg) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 0a307498f..adba6327c 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -24,6 +24,7 @@ from unittest import mock import google.api_core +from google.cloud.bigquery._helpers import _isinstance_or_raise @pytest.mark.skipif( @@ -1661,3 +1662,34 @@ def test_w_env_var(self): host = self._call_fut() self.assertEqual(host, HOST) + + +class Test__isinstance_or_raise: + @pytest.mark.parametrize( + "value,dtype,none_allowed,expected", + [ + (None, str, True, None), + ("hello world.uri", str, True, "hello world.uri"), + ("hello world.uri", str, False, "hello world.uri"), + (None, (str, float), True, None), + ("hello world.uri", (str, float), True, "hello world.uri"), + ("hello world.uri", (str, float), False, "hello world.uri"), + ], + ) + def test__valid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed) + assert result == expected + + @pytest.mark.parametrize( + "value,dtype,none_allowed,expected", + [ + (None, str, False, pytest.raises(TypeError)), + ({"key": "value"}, str, True, pytest.raises(TypeError)), + ({"key": "value"}, str, False, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), True, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), False, pytest.raises(TypeError)), + ], + ) + def test__invalid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + with expected: + _isinstance_or_raise(value, dtype, none_allowed=none_allowed) From 62960f255d05b15940a8d2cdc595592175fada11 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 10 Jan 2025 13:22:06 -0500 Subject: [PATCH 392/536] feat: adds the SerDeInfo class and tests (#2108) * feat: adds SerDeInfo class and tests * cleans up type hints and some minor tweaks --- google/cloud/bigquery/schema.py | 88 +++++++++++++++++++++++++++++++ tests/unit/test_schema.py | 92 +++++++++++++++++++++++++++++++-- 2 files changed, 176 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index b062396cf..f93877d45 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -14,8 +14,10 @@ """Schemas for BigQuery tables / queries.""" +from __future__ import annotations import collections import enum +import typing from typing import Any, cast, Dict, Iterable, Optional, Union from google.cloud.bigquery import _helpers @@ -556,3 +558,89 @@ def to_api_repr(self) -> dict: """ answer = {"names": list(self.names)} return answer + + +class SerDeInfo: + """Serializer and deserializer information. + + Args: + serialization_library (str): Required. Specifies a fully-qualified class + name of the serialization library that is responsible for the + translation of data between table representation and the underlying + low-level input and output format structures. The maximum length is + 256 characters. + name (Optional[str]): Name of the SerDe. The maximum length is 256 + characters. + parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization + parameters for the serialization library. Maximum size 10 Kib. + """ + + def __init__( + self, + serialization_library: str, + name: Optional[str] = None, + parameters: Optional[dict[str, str]] = None, + ): + self._properties: Dict[str, Any] = {} + self.serialization_library = serialization_library + self.name = name + self.parameters = parameters + + @property + def serialization_library(self) -> str: + """Required. Specifies a fully-qualified class name of the serialization + library that is responsible for the translation of data between table + representation and the underlying low-level input and output format + structures. The maximum length is 256 characters.""" + + return typing.cast(str, self._properties.get("serializationLibrary")) + + @serialization_library.setter + def serialization_library(self, value: str): + value = _helpers._isinstance_or_raise(value, str, none_allowed=False) + self._properties["serializationLibrary"] = value + + @property + def name(self) -> Optional[str]: + """Optional. Name of the SerDe. The maximum length is 256 characters.""" + + return self._properties.get("name") + + @name.setter + def name(self, value: Optional[str] = None): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["name"] = value + + @property + def parameters(self) -> Optional[dict[str, str]]: + """Optional. Key-value pairs that define the initialization parameters + for the serialization library. Maximum size 10 Kib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[dict[str, str]] = None): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> SerDeInfo: + """Factory: constructs an instance of the class (cls) + given its API representation. + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls("PLACEHOLDER") + config._properties = api_repr + return config diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 4b0b28158..380067dc8 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -20,6 +20,7 @@ from google.cloud import bigquery from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery import schema from google.cloud.bigquery.schema import PolicyTagList @@ -130,8 +131,6 @@ def test_constructor_range_str(self): self.assertEqual(field.range_element_type.element_type, "DATETIME") def test_to_api_repr(self): - from google.cloud.bigquery.schema import PolicyTagList - policy = PolicyTagList(names=("foo", "bar")) self.assertEqual( policy.to_api_repr(), @@ -886,8 +885,6 @@ def test_valid_mapping_representation(self): class TestPolicyTags(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.schema import PolicyTagList - return PolicyTagList def _make_one(self, *args, **kw): @@ -1129,3 +1126,90 @@ def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField assert SchemaField(**field).to_api_repr() == api + + +class TestSerDeInfo: + """Tests for the SerDeInfo class.""" + + @staticmethod + def _get_target_class(): + return schema.SerDeInfo + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + ("testpath.to.LazySimpleSerDe", None, None), + ("testpath.to.LazySimpleSerDe", "serde_name", None), + ("testpath.to.LazySimpleSerDe", None, {"key": "value"}), + ("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}), + ], + ) + def test_ctor_valid_input(self, serialization_library, name, parameters): + serde_info = self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + assert serde_info.serialization_library == serialization_library + assert serde_info.name == name + assert serde_info.parameters == parameters + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + (123, None, None), + ("testpath.to.LazySimpleSerDe", 123, None), + ("testpath.to.LazySimpleSerDe", None, ["test", "list"]), + ("testpath.to.LazySimpleSerDe", None, 123), + ], + ) + def test_ctor_invalid_input(self, serialization_library, name, parameters): + with pytest.raises(TypeError) as e: + self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + serde_info = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + expected_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + assert serde_info.to_api_repr() == expected_repr + + def test_from_api_repr(self): + """GIVEN an api representation of a SerDeInfo object (i.e. resource) + WHEN converted into a SerDeInfo object using from_api_repr() + THEN it will have the representation in dict format as a SerDeInfo + object made directly (via _make_one()) and represented in dict format. + """ + api_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + + expected = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + + klass = self._get_target_class() + result = klass.from_api_repr(api_repr) + + # We convert both to dict format because these classes do not have a + # __eq__() method to facilitate direct equality comparisons. + assert result.to_api_repr() == expected.to_api_repr() From 6be0272ff25dac97a38ae4ee5aa02016dc82a0d8 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 14 Jan 2025 15:48:57 -0500 Subject: [PATCH 393/536] feat: adds StorageDescriptor and tests (#2109) * feat: adds StorageDescriptor and tests * updates attr names, corrects type hinting --- google/cloud/bigquery/schema.py | 118 +++++++++++++++++++++++++++++ tests/unit/test_schema.py | 128 ++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index f93877d45..8d62b2b5b 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -644,3 +644,121 @@ def from_api_repr(cls, api_repr: dict) -> SerDeInfo: config = cls("PLACEHOLDER") config._properties = api_repr return config + + +class StorageDescriptor: + """Contains information about how a table's data is stored and accessed by open + source query engines. + + Args: + input_format (Optional[str]): Specifies the fully qualified class name of + the InputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters. + location_uri (Optional[str]): The physical location of the table (e.g. + 'gs://spark-dataproc-data/pangea-data/case_sensitive/' or + 'gs://spark-dataproc-data/pangea-data/'). The maximum length is + 2056 bytes. + output_format (Optional[str]): Specifies the fully qualified class name + of the OutputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). The maximum + length is 128 characters. + serde_info (Union[SerDeInfo, dict, None]): Serializer and deserializer information. + """ + + def __init__( + self, + input_format: Optional[str] = None, + location_uri: Optional[str] = None, + output_format: Optional[str] = None, + serde_info: Union[SerDeInfo, dict, None] = None, + ): + self._properties: Dict[str, Any] = {} + self.input_format = input_format + self.location_uri = location_uri + self.output_format = output_format + # Using typing.cast() because mypy cannot wrap it's head around the fact that: + # the setter can accept Union[SerDeInfo, dict, None] + # but the getter will only ever return Optional[SerDeInfo]. + self.serde_info = typing.cast(Optional[SerDeInfo], serde_info) + + @property + def input_format(self) -> Optional[str]: + """Optional. Specifies the fully qualified class name of the InputFormat + (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters.""" + + return self._properties.get("inputFormat") + + @input_format.setter + def input_format(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["inputFormat"] = value + + @property + def location_uri(self) -> Optional[str]: + """Optional. The physical location of the table (e.g. 'gs://spark- + dataproc-data/pangea-data/case_sensitive/' or 'gs://spark-dataproc- + data/pangea-data/'). The maximum length is 2056 bytes.""" + + return self._properties.get("locationUri") + + @location_uri.setter + def location_uri(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["locationUri"] = value + + @property + def output_format(self) -> Optional[str]: + """Optional. Specifies the fully qualified class name of the + OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). + The maximum length is 128 characters.""" + + return self._properties.get("outputFormat") + + @output_format.setter + def output_format(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["outputFormat"] = value + + @property + def serde_info(self) -> Optional[SerDeInfo]: + """Optional. Serializer and deserializer information.""" + + prop = _helpers._get_sub_prop(self._properties, ["serDeInfo"]) + if prop is not None: + return typing.cast(SerDeInfo, SerDeInfo.from_api_repr(prop)) + return None + + @serde_info.setter + def serde_info(self, value: Union[SerDeInfo, dict, None]): + value = _helpers._isinstance_or_raise( + value, (SerDeInfo, dict), none_allowed=True + ) + + if isinstance(value, SerDeInfo): + self._properties["serDeInfo"] = value.to_api_repr() + else: + self._properties["serDeInfo"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, resource: dict) -> StorageDescriptor: + """Factory: constructs an instance of the class (cls) + given its API representation. + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = resource + return config diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 380067dc8..7e84dd63f 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -1213,3 +1213,131 @@ def test_from_api_repr(self): # We convert both to dict format because these classes do not have a # __eq__() method to facilitate direct equality comparisons. assert result.to_api_repr() == expected.to_api_repr() + + +class TestStorageDescriptor: + """Tests for the StorageDescriptor class.""" + + @staticmethod + def _get_target_class(): + return schema.StorageDescriptor + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + serdeinfo_resource = { + "serialization_library": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + } + + SERDEINFO = schema.SerDeInfo("PLACEHOLDER").from_api_repr(serdeinfo_resource) + + STORAGEDESCRIPTOR = { + "inputFormat": "testpath.to.OrcInputFormat", + "locationUri": "gs://test/path/", + "outputFormat": "testpath.to.OrcOutputFormat", + "serDeInfo": SERDEINFO.to_api_repr(), + } + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (None, None, None, None), + ("testpath.to.OrcInputFormat", None, None, None), + (None, "gs://test/path/", None, None), + (None, None, "testpath.to.OrcOutputFormat", None), + (None, None, None, SERDEINFO), + ( + "testpath.to.OrcInputFormat", + "gs://test/path/", + "testpath.to.OrcOutputFormat", + SERDEINFO, # uses SERDEINFO class format + ), + ( + "testpath.to.OrcInputFormat", + "gs://test/path/", + "testpath.to.OrcOutputFormat", + serdeinfo_resource, # uses api resource format (dict) + ), + ], + ) + def test_ctor_valid_input( + self, input_format, location_uri, output_format, serde_info + ): + storage_descriptor = self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + assert storage_descriptor.input_format == input_format + assert storage_descriptor.location_uri == location_uri + assert storage_descriptor.output_format == output_format + if isinstance(serde_info, schema.SerDeInfo): + assert ( + storage_descriptor.serde_info.to_api_repr() == serde_info.to_api_repr() + ) + elif isinstance(serde_info, dict): + assert storage_descriptor.serde_info.to_api_repr() == serde_info + else: + assert storage_descriptor.serde_info is None + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (123, None, None, None), + (None, 123, None, None), + (None, None, 123, None), + (None, None, None, 123), + ], + ) + def test_ctor_invalid_input( + self, input_format, location_uri, output_format, serde_info + ): + with pytest.raises(TypeError) as e: + self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + storage_descriptor = self._make_one( + input_format="input_format", + location_uri="location_uri", + output_format="output_format", + serde_info=self.SERDEINFO, + ) + expected_repr = { + "inputFormat": "input_format", + "locationUri": "location_uri", + "outputFormat": "output_format", + "serDeInfo": self.SERDEINFO.to_api_repr(), + } + assert storage_descriptor.to_api_repr() == expected_repr + + def test_from_api_repr(self): + """GIVEN an api representation of a StorageDescriptor (i.e. STORAGEDESCRIPTOR) + WHEN converted into a StorageDescriptor using from_api_repr() and + displayed as a dict + THEN it will have the same representation a StorageDescriptor created + directly (via the _make_one() func) and displayed as a dict. + """ + + # generate via STORAGEDESCRIPTOR + resource = self.STORAGEDESCRIPTOR + result = self._get_target_class().from_api_repr(resource) + # result = klass.from_api_repr(resource) + + expected = self._make_one( + input_format="testpath.to.OrcInputFormat", + location_uri="gs://test/path/", + output_format="testpath.to.OrcOutputFormat", + serde_info=self.SERDEINFO, + ) + assert result.to_api_repr() == expected.to_api_repr() From 3e130166f43dcc06704fe90edf9068dfd44842a6 Mon Sep 17 00:00:00 2001 From: Keunsoo Park <43742836+keunsoopark@users.noreply.github.com> Date: Tue, 14 Jan 2025 22:17:13 +0100 Subject: [PATCH 394/536] feat: resource tags in dataset (#2090) * feat: resource tags in dataset * fix: fix unittets * Delete dataset/pyvenv.cfg * Update google/cloud/bigquery/dataset.py Co-authored-by: Lingqing Gan * Update google/cloud/bigquery/dataset.py Co-authored-by: Lingqing Gan * added system tests & fix unittest for none * add missing assert * remove venv * include resourcemanager in noxfile.py * fix fixture for tag keys * register tags before using in tests * handle alreadyexist error * fix: tag keys & values creation & deletion * fix comment * make tag keys unique * remove unused import --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/dataset.py | 23 ++++++++ noxfile.py | 4 ++ tests/system/test_client.py | 90 ++++++++++++++++++++++++++++++- tests/unit/test_client.py | 6 +++ tests/unit/test_create_dataset.py | 5 ++ tests/unit/test_dataset.py | 22 ++++++++ 6 files changed, 148 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index c49a52faf..4d06d729d 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -530,6 +530,7 @@ class Dataset(object): "storage_billing_model": "storageBillingModel", "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", + "resource_tags": "resourceTags", } def __init__(self, dataset_ref) -> None: @@ -801,6 +802,28 @@ def labels(self, value): raise ValueError("Pass a dict") self._properties["labels"] = value + @property + def resource_tags(self): + """Dict[str, str]: Resource tags of the dataset. + + Optional. The tags attached to this dataset. Tag keys are globally + unique. Tag key is expected to be in the namespaced format, for + example "123456789012/environment" where 123456789012 is + the ID of the parent organization or project resource for this tag + key. Tag value is expected to be the short name, for example + "Production". + + Raises: + ValueError: for invalid value types. + """ + return self._properties.setdefault("resourceTags", {}) + + @resource_tags.setter + def resource_tags(self, value): + if not isinstance(value, dict) and value is not None: + raise ValueError("Pass a dict") + self._properties["resourceTags"] = value + @property def default_encryption_configuration(self): """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom diff --git a/noxfile.py b/noxfile.py index 750a6b459..e08956b11 100644 --- a/noxfile.py +++ b/noxfile.py @@ -219,6 +219,9 @@ def system(session): # Data Catalog needed for the column ACL test with a real Policy Tag. session.install("google-cloud-datacatalog", "-c", constraints_path) + # Resource Manager needed for test with a real Resource Tag. + session.install("google-cloud-resource-manager", "-c", constraints_path) + if session.python in ["3.11", "3.12"]: extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: @@ -366,6 +369,7 @@ def prerelease_deps(session): session.install( "freezegun", "google-cloud-datacatalog", + "google-cloud-resource-manager", "google-cloud-storage", "google-cloud-testutils", "psutil", diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 95c679a14..c0dd83b12 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -25,6 +25,8 @@ import time import unittest import uuid +import random +import string from typing import Optional from google.api_core.exceptions import PreconditionFailed @@ -45,6 +47,8 @@ from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient +from google.cloud.resourcemanager_v3 import types as resourcemanager_types +from google.cloud.resourcemanager_v3 import TagKeysClient, TagValuesClient import psutil import pytest from test_utils.retry import RetryErrors @@ -156,9 +160,12 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): def setUp(self): self.to_delete = [] + self.to_delete_tag_keys_values = [] def tearDown(self): policy_tag_client = PolicyTagManagerClient() + tag_keys_client = TagKeysClient() + tag_values_client = TagValuesClient() def _still_in_use(bad_request): return any( @@ -181,6 +188,18 @@ def _still_in_use(bad_request): else: doomed.delete() + # The TagKey cannot be deleted if it has any child TagValues. + for key_values in self.to_delete_tag_keys_values: + tag_key = key_values.pop() + + # Delete tag values first + [ + tag_values_client.delete_tag_value(name=tag_value.name).result() + for tag_value in key_values + ] + + tag_keys_client.delete_tag_key(name=tag_key.name).result() + def test_get_service_account_email(self): client = Config.CLIENT @@ -278,24 +297,74 @@ def test_create_dataset_with_default_rounding_mode(self): self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN") + def _create_resource_tag_key_and_values(self, key, values): + tag_key_client = TagKeysClient() + tag_value_client = TagValuesClient() + + tag_key_parent = f"projects/{Config.CLIENT.project}" + new_tag_key = resourcemanager_types.TagKey( + short_name=key, parent=tag_key_parent + ) + tag_key = tag_key_client.create_tag_key(tag_key=new_tag_key).result() + self.to_delete_tag_keys_values.insert(0, [tag_key]) + + for value in values: + new_tag_value = resourcemanager_types.TagValue( + short_name=value, parent=tag_key.name + ) + tag_value = tag_value_client.create_tag_value( + tag_value=new_tag_value + ).result() + self.to_delete_tag_keys_values[0].insert(0, tag_value) + def test_update_dataset(self): dataset = self.temp_dataset(_make_dataset_id("update_dataset")) self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) self.assertEqual(dataset.labels, {}) + self.assertEqual(dataset.resource_tags, {}) self.assertIs(dataset.is_case_insensitive, False) + # This creates unique tag keys for each of test runnings for different Python versions + tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4)) + tag_1 = f"env_{tag_postfix}" + tag_2 = f"component_{tag_postfix}" + tag_3 = f"project_{tag_postfix}" + + # Tags need to be created before they can be used in a dataset. + self._create_resource_tag_key_and_values(tag_1, ["prod", "dev"]) + self._create_resource_tag_key_and_values(tag_2, ["batch"]) + self._create_resource_tag_key_and_values(tag_3, ["atlas"]) + dataset.friendly_name = "Friendly" dataset.description = "Description" dataset.labels = {"priority": "high", "color": "blue"} + dataset.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "prod", + f"{Config.CLIENT.project}/{tag_2}": "batch", + } dataset.is_case_insensitive = True ds2 = Config.CLIENT.update_dataset( - dataset, ("friendly_name", "description", "labels", "is_case_insensitive") + dataset, + ( + "friendly_name", + "description", + "labels", + "resource_tags", + "is_case_insensitive", + ), ) self.assertEqual(ds2.friendly_name, "Friendly") self.assertEqual(ds2.description, "Description") self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"}) + self.assertEqual( + ds2.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "prod", + f"{Config.CLIENT.project}/{tag_2}": "batch", + }, + ) self.assertIs(ds2.is_case_insensitive, True) ds2.labels = { @@ -303,8 +372,25 @@ def test_update_dataset(self): "shape": "circle", # add "priority": None, # delete } - ds3 = Config.CLIENT.update_dataset(ds2, ["labels"]) + ds2.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "dev", # change + f"{Config.CLIENT.project}/{tag_3}": "atlas", # add + f"{Config.CLIENT.project}/{tag_2}": None, # delete + } + ds3 = Config.CLIENT.update_dataset(ds2, ["labels", "resource_tags"]) self.assertEqual(ds3.labels, {"color": "green", "shape": "circle"}) + self.assertEqual( + ds3.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "dev", + f"{Config.CLIENT.project}/{tag_3}": "atlas", + }, + ) + + # Remove all tags + ds3.resource_tags = None + ds4 = Config.CLIENT.update_dataset(ds3, ["resource_tags"]) + self.assertEqual(ds4.resource_tags, {}) # If we try to update using d2 again, it will fail because the # previous update changed the ETag. diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cd336b73f..14089b031 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2028,6 +2028,7 @@ def test_update_dataset(self): LABELS = {"priority": "high"} ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] EXP = 17 + RESOURCE_TAGS = {"123456789012/key": "value"} RESOURCE = { "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, "etag": "etag", @@ -2037,6 +2038,7 @@ def test_update_dataset(self): "defaultTableExpirationMs": EXP, "labels": LABELS, "access": ACCESS, + "resourceTags": RESOURCE_TAGS, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -2048,12 +2050,14 @@ def test_update_dataset(self): ds.default_table_expiration_ms = EXP ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] + ds.resource_tags = RESOURCE_TAGS fields = [ "description", "friendly_name", "location", "labels", "access_entries", + "resource_tags", ] with mock.patch( @@ -2077,6 +2081,7 @@ def test_update_dataset(self): "location": LOCATION, "labels": LABELS, "access": ACCESS, + "resourceTags": RESOURCE_TAGS, }, path="/" + PATH, timeout=7.5, @@ -2086,6 +2091,7 @@ def test_update_dataset(self): self.assertEqual(ds2.location, ds.location) self.assertEqual(ds2.labels, ds.labels) self.assertEqual(ds2.access_entries, ds.access_entries) + self.assertEqual(ds2.resource_tags, ds.resource_tags) # ETag becomes If-Match header. ds._properties["etag"] = "etag" diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index a2491a812..bd7c6a8f8 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -65,6 +65,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "tableId": "northern-hemisphere", } DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN" + RESOURCE_TAGS = {"123456789012/foo": "bar"} RESOURCE = { "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, "etag": "etag", @@ -76,6 +77,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "labels": LABELS, "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "defaultRoundingMode": DEFAULT_ROUNDING_MODE, + "resourceTags": RESOURCE_TAGS, } conn = client._connection = make_connection(RESOURCE) entries = [ @@ -91,6 +93,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): before.default_table_expiration_ms = 3600 before.location = LOCATION before.labels = LABELS + before.resource_tags = RESOURCE_TAGS before.default_rounding_mode = DEFAULT_ROUNDING_MODE after = client.create_dataset(before) assert after.dataset_id == DS_ID @@ -103,6 +106,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): assert after.default_table_expiration_ms == 3600 assert after.labels == LABELS assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE + assert after.resource_tags == RESOURCE_TAGS conn.api_request.assert_called_once_with( method="POST", @@ -119,6 +123,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): {"view": VIEW, "role": None}, ], "labels": LABELS, + "resourceTags": RESOURCE_TAGS, }, timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index c0164bc73..46bcd6611 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -894,6 +894,28 @@ def test_location_setter(self): dataset.location = "LOCATION" self.assertEqual(dataset.location, "LOCATION") + def test_resource_tags_update_in_place(self): + dataset = self._make_one(self.DS_REF) + tags = dataset.resource_tags + tags["123456789012/foo"] = "bar" # update in place + self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"}) + + def test_resource_tags_setter(self): + dataset = self._make_one(self.DS_REF) + dataset.resource_tags = {"123456789012/foo": "bar"} + self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"}) + + def test_resource_tags_setter_bad_value(self): + dataset = self._make_one(self.DS_REF) + with self.assertRaises(ValueError): + dataset.resource_tags = "invalid" + with self.assertRaises(ValueError): + dataset.resource_tags = 123 + + def test_resource_tags_getter_missing_value(self): + dataset = self._make_one(self.DS_REF) + self.assertEqual(dataset.resource_tags, {}) + def test_labels_update_in_place(self): dataset = self._make_one(self.DS_REF) del dataset._properties["labels"] # don't start w/ existing dict From 55ca63c23fcb56573e2de67e4f7899939628c4a1 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 14 Jan 2025 23:14:02 -0500 Subject: [PATCH 395/536] feat: Adds ForeignTypeInfo class and tests (#2110) * Adds ForeignTypeInfo class and tests * Tweak to docstring * minor adjustment in test to enhance code coverage * Updates spacing in docstrings * More updates to spacing in docstrings. --- google/cloud/bigquery/schema.py | 64 ++++++++++++++++++++++++++++- tests/unit/test_schema.py | 71 ++++++++++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 8d62b2b5b..b278b686a 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -560,6 +560,63 @@ def to_api_repr(self) -> dict: return answer +class ForeignTypeInfo: + """Metadata about the foreign data type definition such as the system in which the + type is defined. + + Args: + type_system (str): Required. Specifies the system which defines the + foreign data type. + + TypeSystem enum currently includes: + * "TYPE_SYSTEM_UNSPECIFIED" + * "HIVE" + """ + + def __init__(self, type_system: Optional[str] = None): + self._properties: Dict[str, Any] = {} + self.type_system = type_system + + @property + def type_system(self) -> Optional[str]: + """Required. Specifies the system which defines the foreign data + type.""" + + return self._properties.get("typeSystem") + + @type_system.setter + def type_system(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["typeSystem"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignTypeInfo": + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'api_repr'. + """ + + config = cls() + config._properties = api_repr + return config + + class SerDeInfo: """Serializer and deserializer information. @@ -625,6 +682,7 @@ def parameters(self, value: Optional[dict[str, str]] = None): def to_api_repr(self) -> dict: """Build an API representation of this object. + Returns: Dict[str, Any]: A dictionary in the format used by the BigQuery API. @@ -635,11 +693,13 @@ def to_api_repr(self) -> dict: def from_api_repr(cls, api_repr: dict) -> SerDeInfo: """Factory: constructs an instance of the class (cls) given its API representation. + Args: - resource (Dict[str, Any]): + api_repr (Dict[str, Any]): API representation of the object to be instantiated. + Returns: - An instance of the class initialized with data from 'resource'. + An instance of the class initialized with data from 'api_repr'. """ config = cls("PLACEHOLDER") config._properties = api_repr diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 7e84dd63f..efbc5d26f 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -1128,6 +1128,73 @@ def test_to_api_repr_parameterized(field, api): assert SchemaField(**field).to_api_repr() == api +class TestForeignTypeInfo: + """Tests for ForeignTypeInfo objects.""" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.schema import ForeignTypeInfo + + return ForeignTypeInfo + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + @pytest.mark.parametrize( + "type_system,expected", + [ + (None, None), + ("TYPE_SYSTEM_UNSPECIFIED", "TYPE_SYSTEM_UNSPECIFIED"), + ("HIVE", "HIVE"), + ], + ) + def test_ctor_valid_input(self, type_system, expected): + result = self._make_one(type_system=type_system) + + assert result.type_system == expected + + def test_ctor_invalid_input(self): + with pytest.raises(TypeError) as e: + self._make_one(type_system=123) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + @pytest.mark.parametrize( + "type_system,expected", + [ + ("TYPE_SYSTEM_UNSPECIFIED", {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}), + ("HIVE", {"typeSystem": "HIVE"}), + (None, {"typeSystem": None}), + ], + ) + def test_to_api_repr(self, type_system, expected): + result = self._make_one(type_system=type_system) + + assert result.to_api_repr() == expected + + def test_from_api_repr(self): + """GIVEN an api representation of a ForeignTypeInfo object (i.e. api_repr) + WHEN converted into a ForeignTypeInfo object using from_api_repr() + THEN it will have the same representation in dict format as a ForeignTypeInfo + object made directly (via _make_one()) and represented in dict format. + """ + api_repr = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + + expected = self._make_one( + type_system="TYPE_SYSTEM_UNSPECIFIED", + ) + + klass = self._get_target_class() + result = klass.from_api_repr(api_repr) + + # We convert both to dict format because these classes do not have a + # __eq__() method to facilitate direct equality comparisons. + assert result.to_api_repr() == expected.to_api_repr() + + class TestSerDeInfo: """Tests for the SerDeInfo class.""" @@ -1190,9 +1257,9 @@ def test_to_api_repr(self): assert serde_info.to_api_repr() == expected_repr def test_from_api_repr(self): - """GIVEN an api representation of a SerDeInfo object (i.e. resource) + """GIVEN an api representation of a SerDeInfo object (i.e. api_repr) WHEN converted into a SerDeInfo object using from_api_repr() - THEN it will have the representation in dict format as a SerDeInfo + THEN it will have the same representation in dict format as a SerDeInfo object made directly (via _make_one()) and represented in dict format. """ api_repr = { From b929a900d49e2c15897134209ed9de5fc7f238cd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 15 Jan 2025 12:44:27 -0500 Subject: [PATCH 396/536] feat: adds ExternalCatalogDatasetOptions and tests (#2111) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: adds ExternalCatalogDatasetOptions and tests * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Sweña (Swast) * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Tim Sweña (Swast) Co-authored-by: Owl Bot --- google/cloud/bigquery/dataset.py | 25 +++++++ google/cloud/bigquery/external_config.py | 76 +++++++++++++++++++- tests/unit/test_dataset.py | 84 ++++++++++++++++++++++ tests/unit/test_external_config.py | 89 ++++++++++++++++++++++++ 4 files changed, 273 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 4d06d729d..15a11fb40 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -27,6 +27,7 @@ from google.cloud.bigquery.routine import Routine, RoutineReference from google.cloud.bigquery.table import Table, TableReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import external_config from typing import Optional, List, Dict, Any, Union @@ -531,6 +532,7 @@ class Dataset(object): "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", "resource_tags": "resourceTags", + "external_catalog_dataset_options": "externalCatalogDatasetOptions", } def __init__(self, dataset_ref) -> None: @@ -898,6 +900,29 @@ def storage_billing_model(self, value): ) self._properties["storageBillingModel"] = value + @property + def external_catalog_dataset_options(self): + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + prop = _helpers._get_sub_prop( + self._properties, ["externalCatalogDatasetOptions"] + ) + + if prop is not None: + prop = external_config.ExternalCatalogDatasetOptions.from_api_repr(prop) + return prop + + @external_catalog_dataset_options.setter + def external_catalog_dataset_options(self, value): + value = _helpers._isinstance_or_raise( + value, external_config.ExternalCatalogDatasetOptions, none_allowed=True + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] + ] = (value.to_api_repr() if value is not None else None) + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index a891bc232..7f2b58f2b 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -18,7 +18,7 @@ Job.configuration.query.tableDefinitions. """ -from __future__ import absolute_import +from __future__ import absolute_import, annotations import base64 import copy @@ -28,6 +28,7 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery import _helpers from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -1003,3 +1004,76 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": config = cls(resource["sourceFormat"]) config._properties = copy.deepcopy(resource) return config + + +class ExternalCatalogDatasetOptions: + """Options defining open source compatible datasets living in the BigQuery catalog. + Contains metadata of open source database, schema or namespace represented + by the current dataset. + + Args: + default_storage_location_uri (Optional[str]): The storage location URI for all + tables in the dataset. Equivalent to hive metastore's database + locationUri. Maximum length of 1024 characters. (str) + parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters + and properties of the open source schema. Maximum size of 2Mib. + """ + + def __init__( + self, + default_storage_location_uri: Optional[str] = None, + parameters: Optional[Dict[str, Any]] = None, + ): + self._properties: Dict[str, Any] = {} + self.default_storage_location_uri = default_storage_location_uri + self.parameters = parameters + + @property + def default_storage_location_uri(self) -> Optional[str]: + """Optional. The storage location URI for all tables in the dataset. + Equivalent to hive metastore's database locationUri. Maximum length of + 1024 characters.""" + + return self._properties.get("defaultStorageLocationUri") + + @default_storage_location_uri.setter + def default_storage_location_uri(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["defaultStorageLocationUri"] = value + + @property + def parameters(self) -> Optional[Dict[str, Any]]: + """Optional. A map of key value pairs defining the parameters and + properties of the open source schema. Maximum size of 2Mib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[Dict[str, Any]]): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = api_repr + return config diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 46bcd6611..8ab8dffec 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -650,6 +650,16 @@ class TestDataset(unittest.TestCase): DS_ID = "dataset-id" DS_REF = DatasetReference(PROJECT, DS_ID) KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + API_REPR = { + "datasetReference": {"projectId": "project", "datasetId": "dataset-id"}, + "labels": {}, + "externalCatalogDatasetOptions": { + "defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI, + "parameters": PARAMETERS, + }, + } @staticmethod def _get_target_class(): @@ -1067,6 +1077,80 @@ def test___repr__(self): expected = "Dataset(DatasetReference('project1', 'dataset1'))" self.assertEqual(repr(dataset), expected) + def test_external_catalog_dataset_options_setter(self): + # GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS + # WHEN an ExternalCatalogDatasetOptions obj is created + # and added to a dataset. + # THEN the api representation of the dataset will match API_REPR + + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + + result = dataset.to_api_repr() + expected = self.API_REPR + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_exists(self): + # GIVEN default dataset PLUS an ExternalCatalogDatasetOptions + # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions + # matches the api_repr of the external_catalog_dataset_options attribute. + + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = ecdo_obj.to_api_repr() + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_is_none(self): + # GIVEN only a default dataset + # THEN confirm that external_catalog_dataset_options is None + + dataset = self._make_one(self.DS_REF) + expected = None + result = dataset.external_catalog_dataset_options + assert result == expected + + def test_external_catalog_dataset_options_from_api_repr(self): + # GIVEN default dataset including an ExternalCatalogDatasetOptions + # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions + # on a dataset object created via from_api_repr matches the api_repr + # of the "externalCatalogDatasetOptions" key. + + api_repr = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(api_repr) + + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = api_repr["externalCatalogDatasetOptions"] + assert result == expected + + def test_external_catalog_dataset_options_to_api_repr(self): + # GIVEN a dataset api_repr including an ExternalCatalogDatasetOptions key + # THEN confirm that the api_repr of that key from a dataset object created + # via the to_api_repr() method matches the value of the key + # used to create the dataset object + + api_repr = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(api_repr) + + result = dataset.to_api_repr()["externalCatalogDatasetOptions"] + expected = api_repr["externalCatalogDatasetOptions"] + assert result == expected + class TestDatasetListItem(unittest.TestCase): @staticmethod diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 9fd16e699..0c27d8e56 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -19,6 +19,8 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +import pytest + class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] @@ -890,3 +892,90 @@ def _copy_and_update(d, u): d = copy.deepcopy(d) d.update(u) return d + + +class TestExternalCatalogDatasetOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + return ExternalCatalogDatasetOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + (DEFAULT_STORAGE_LOCATION_URI, PARAMETERS), # set all params + (DEFAULT_STORAGE_LOCATION_URI, None), # set only one argument at a time + (None, PARAMETERS), + (None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, + default_storage_location_uri, + parameters, + ): + """Test ExternalCatalogDatasetOptions constructor with explicit values.""" + + instance = self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + assert instance.default_storage_location_uri == default_storage_location_uri + assert instance.parameters == parameters + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + (123, None), # does not accept integers + (None, 123), + ], + ) + def test_ctor_invalid_input(self, default_storage_location_uri, parameters): + """Test ExternalCatalogDatasetOptions constructor with invalid input.""" + + with pytest.raises(TypeError) as e: + self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + """Test ExternalCatalogDatasetOptions.to_api_repr method.""" + + instance = self._make_one( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + resource = instance.to_api_repr() + assert ( + resource["defaultStorageLocationUri"] == self.DEFAULT_STORAGE_LOCATION_URI + ) + assert resource["parameters"] == self.PARAMETERS + + def test_from_api_repr(self): + """GIVEN an api representation of an ExternalCatalogDatasetOptions object (i.e. api_repr) + WHEN converted into an ExternalCatalogDatasetOptions object using from_api_repr() + THEN it will have the representation in dict format as an ExternalCatalogDatasetOptions + object made directly (via _make_one()) and represented in dict format. + """ + + instance = self._make_one() + api_repr = { + "defaultStorageLocationUri": self.DEFAULT_STORAGE_LOCATION_URI, + "parameters": self.PARAMETERS, + } + result = instance.from_api_repr(api_repr) + + assert isinstance(result, external_config.ExternalCatalogDatasetOptions) + assert result._properties == api_repr From 9c504186f03ffb8b86836c683912b310db2435e9 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 15 Jan 2025 10:22:48 -0800 Subject: [PATCH 397/536] chore(main): release 3.28.0 (#2056) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 989b7f020..6a7ff5641 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.28.0](https://github.com/googleapis/python-bigquery/compare/v3.27.0...v3.28.0) (2025-01-15) + + +### Features + +* Add property for `allowNonIncrementalDefinition` for materialized view ([#2084](https://github.com/googleapis/python-bigquery/issues/2084)) ([3359ef3](https://github.com/googleapis/python-bigquery/commit/3359ef37b90243bea2d9e68bb996fe5d736f304c)) +* Add property for maxStaleness in table definitions ([#2087](https://github.com/googleapis/python-bigquery/issues/2087)) ([729322c](https://github.com/googleapis/python-bigquery/commit/729322c2288a30464f2f135ba18b9c4aa7d2f0da)) +* Add type hints to Client ([#2044](https://github.com/googleapis/python-bigquery/issues/2044)) ([40529de](https://github.com/googleapis/python-bigquery/commit/40529de923e25c41c6728c121b9c82a042967ada)) +* Adds ExternalCatalogDatasetOptions and tests ([#2111](https://github.com/googleapis/python-bigquery/issues/2111)) ([b929a90](https://github.com/googleapis/python-bigquery/commit/b929a900d49e2c15897134209ed9de5fc7f238cd)) +* Adds ForeignTypeInfo class and tests ([#2110](https://github.com/googleapis/python-bigquery/issues/2110)) ([55ca63c](https://github.com/googleapis/python-bigquery/commit/55ca63c23fcb56573e2de67e4f7899939628c4a1)) +* Adds new input validation function similar to isinstance. ([#2107](https://github.com/googleapis/python-bigquery/issues/2107)) ([a2bebb9](https://github.com/googleapis/python-bigquery/commit/a2bebb95c5ef32ac7c7cbe19c3e7a9412cbee60d)) +* Adds StorageDescriptor and tests ([#2109](https://github.com/googleapis/python-bigquery/issues/2109)) ([6be0272](https://github.com/googleapis/python-bigquery/commit/6be0272ff25dac97a38ae4ee5aa02016dc82a0d8)) +* Adds the SerDeInfo class and tests ([#2108](https://github.com/googleapis/python-bigquery/issues/2108)) ([62960f2](https://github.com/googleapis/python-bigquery/commit/62960f255d05b15940a8d2cdc595592175fada11)) +* Migrate to pyproject.toml ([#2041](https://github.com/googleapis/python-bigquery/issues/2041)) ([1061611](https://github.com/googleapis/python-bigquery/commit/106161180ead01aca1ead909cf06ca559f68666d)) +* Preserve unknown fields from the REST API representation in `SchemaField` ([#2097](https://github.com/googleapis/python-bigquery/issues/2097)) ([aaf1eb8](https://github.com/googleapis/python-bigquery/commit/aaf1eb85ada95ab866be0199812ea7f5c7f50766)) +* Resource tags in dataset ([#2090](https://github.com/googleapis/python-bigquery/issues/2090)) ([3e13016](https://github.com/googleapis/python-bigquery/commit/3e130166f43dcc06704fe90edf9068dfd44842a6)) +* Support setting max_stream_count when fetching query result ([#2051](https://github.com/googleapis/python-bigquery/issues/2051)) ([d461297](https://github.com/googleapis/python-bigquery/commit/d4612979b812d2a835e47200f27a87a66bcb856a)) + + +### Bug Fixes + +* Allow geopandas 1.x ([#2065](https://github.com/googleapis/python-bigquery/issues/2065)) ([f2ab8cb](https://github.com/googleapis/python-bigquery/commit/f2ab8cbfe00d442ad3b40683ecfec320e53b4688)) + + +### Documentation + +* Render fields correctly for update calls ([#2055](https://github.com/googleapis/python-bigquery/issues/2055)) ([a4d9534](https://github.com/googleapis/python-bigquery/commit/a4d9534a900f13ae7355904cda05097d781f27e3)) + ## [3.27.0](https://github.com/googleapis/python-bigquery/compare/v3.26.0...v3.27.0) (2024-11-01) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 8f4418777..7da2c534f 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.27.0" +__version__ = "3.28.0" From 7de6822e1c556a68cb8d50e90664c094697cca1d Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 17 Jan 2025 10:24:06 -0800 Subject: [PATCH 398/536] fix: add default value in SchemaField.from_api_repr() (#2115) --- google/cloud/bigquery/schema.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index b278b686a..42dfbfca8 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -228,6 +228,12 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": # fields. See https://github.com/googleapis/python-bigquery/issues/6 placeholder._properties = api_repr + # Add the field `mode` with default value if it does not exist. Fixes + # an incompatibility issue with pandas-gbq: + # https://github.com/googleapis/python-bigquery-pandas/issues/854 + if "mode" not in placeholder._properties: + placeholder._properties["mode"] = "NULLABLE" + return placeholder @property From cdc1a6e1623b8305c6a6a1a481b3365e866a073d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 21 Jan 2025 06:04:34 -0500 Subject: [PATCH 399/536] feat: add ExternalCatalogTableOptions class and tests (#2116) * Updates most of external_catalog_table_options * Adds ExternalCatalogTableOptions and tests --- google/cloud/bigquery/external_config.py | 107 ++++++++++++++++++ google/cloud/bigquery/magics/magics.py | 2 +- google/cloud/bigquery/table.py | 35 ++++++ tests/unit/test_external_config.py | 137 +++++++++++++++++++++++ tests/unit/test_table.py | 87 ++++++++++++++ 5 files changed, 367 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 7f2b58f2b..73c4acabf 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -30,6 +30,7 @@ from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery import _helpers from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions +from google.cloud.bigquery import schema from google.cloud.bigquery.schema import SchemaField @@ -1077,3 +1078,109 @@ def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions: config = cls() config._properties = api_repr return config + + +class ExternalCatalogTableOptions: + """Metadata about open source compatible table. The fields contained in these + options correspond to hive metastore's table level properties. + + Args: + connection_id (Optional[str]): The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. + parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters + and properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information + about the physical storage of this table. + """ + + def __init__( + self, + connection_id: Optional[str] = None, + parameters: Union[Dict[str, Any], None] = None, + storage_descriptor: Optional[schema.StorageDescriptor] = None, + ): + self._properties: Dict[str, Any] = {} + self.connection_id = connection_id + self.parameters = parameters + self.storage_descriptor = storage_descriptor + + @property + def connection_id(self) -> Optional[str]: + """Optional. The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. + """ + + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["connectionId"] = value + + @property + def parameters(self) -> Union[Dict[str, Any], None]: + """Optional. A map of key value pairs defining the parameters and + properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + """ + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Union[Dict[str, Any], None]): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + @property + def storage_descriptor(self) -> Any: + """Optional. A storage descriptor containing information about the + physical storage of this table.""" + + prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"]) + + if prop is not None: + return schema.StorageDescriptor.from_api_repr(prop) + return None + + @storage_descriptor.setter + def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]): + value = _helpers._isinstance_or_raise( + value, (schema.StorageDescriptor, dict), none_allowed=True + ) + if isinstance(value, schema.StorageDescriptor): + self._properties["storageDescriptor"] = value.to_api_repr() + else: + self._properties["storageDescriptor"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'api_repr'. + """ + config = cls() + config._properties = api_repr + return config diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index b153d959a..a5be95185 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -56,7 +56,7 @@ bigquery_magics = None -IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) +IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore class Context(object): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 80ab330ba..fa8d81962 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -69,6 +69,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery import external_config if typing.TYPE_CHECKING: # pragma: NO COVER # Unconditionally import optional dependencies again to tell pytype that @@ -408,6 +409,7 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", "max_staleness": "maxStaleness", + "external_catalog_table_options": "externalCatalogTableOptions", } def __init__(self, table_ref, schema=None) -> None: @@ -1023,6 +1025,39 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @property + def external_catalog_table_options( + self, + ) -> Optional[external_config.ExternalCatalogTableOptions]: + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ) + if prop is not None: + return external_config.ExternalCatalogTableOptions.from_api_repr(prop) + return None + + @external_catalog_table_options.setter + def external_catalog_table_options( + self, value: Union[external_config.ExternalCatalogTableOptions, dict, None] + ): + value = _helpers._isinstance_or_raise( + value, + (external_config.ExternalCatalogTableOptions, dict), + none_allowed=True, + ) + if isinstance(value, external_config.ExternalCatalogTableOptions): + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value.to_api_repr() + else: + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 0c27d8e56..7f84a9f5b 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -14,6 +14,7 @@ import base64 import copy +from typing import Any, Dict, Optional import unittest from google.cloud.bigquery import external_config @@ -979,3 +980,139 @@ def test_from_api_repr(self): assert isinstance(result, external_config.ExternalCatalogDatasetOptions) assert result._properties == api_repr + + +class TestExternalCatalogTableOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogTableOptions + + return ExternalCatalogTableOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + storage_descriptor_repr = { + "inputFormat": "testpath.to.OrcInputFormat", + "locationUri": "gs://test/path/", + "outputFormat": "testpath.to.OrcOutputFormat", + "serDeInfo": { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + }, + } + + CONNECTIONID = "connection123" + PARAMETERS = {"key": "value"} + STORAGEDESCRIPTOR = schema.StorageDescriptor.from_api_repr(storage_descriptor_repr) + EXTERNALCATALOGTABLEOPTIONS = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": STORAGEDESCRIPTOR.to_api_repr(), + } + + @pytest.mark.parametrize( + "connection_id,parameters,storage_descriptor", + [ + ( + CONNECTIONID, + PARAMETERS, + STORAGEDESCRIPTOR, + ), # set all parameters at once + (CONNECTIONID, None, None), # set only one parameter at a time + (None, PARAMETERS, None), + (None, None, STORAGEDESCRIPTOR), # set storage descriptor using obj + (None, None, storage_descriptor_repr), # set storage descriptor using dict + (None, None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, + connection_id, + parameters, + storage_descriptor, + ): + instance = self._make_one( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + assert instance.connection_id == connection_id + assert instance.parameters == parameters + + if isinstance(storage_descriptor, schema.StorageDescriptor): + assert ( + instance.storage_descriptor.to_api_repr() + == storage_descriptor.to_api_repr() + ) + elif isinstance(storage_descriptor, dict): + assert instance.storage_descriptor.to_api_repr() == storage_descriptor + else: + assert instance.storage_descriptor is None + + @pytest.mark.parametrize( + "connection_id,parameters,storage_descriptor", + [ + pytest.param( + 123, + PARAMETERS, + STORAGEDESCRIPTOR, + id="connection_id-invalid-type", + ), + pytest.param( + CONNECTIONID, + 123, + STORAGEDESCRIPTOR, + id="parameters-invalid-type", + ), + pytest.param( + CONNECTIONID, + PARAMETERS, + 123, + id="storage_descriptor-invalid-type", + ), + ], + ) + def test_ctor_invalid_input( + self, + connection_id: str, + parameters: Dict[str, Any], + storage_descriptor: Optional[schema.StorageDescriptor], + ): + with pytest.raises(TypeError) as e: + external_config.ExternalCatalogTableOptions( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + instance = self._make_one( + connection_id=self.CONNECTIONID, + parameters=self.PARAMETERS, + storage_descriptor=self.STORAGEDESCRIPTOR, + ) + + result = instance.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + + assert result == expected + + def test_from_api_repr(self): + result = self._make_one( + connection_id=self.CONNECTIONID, + parameters=self.PARAMETERS, + storage_descriptor=self.STORAGEDESCRIPTOR, + ) + + instance = self._make_one() + api_repr = self.EXTERNALCATALOGTABLEOPTIONS + result = instance.from_api_repr(api_repr) + + assert isinstance(result, external_config.ExternalCatalogTableOptions) + assert result._properties == api_repr diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index e9d461e9d..de8b331f5 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -30,6 +30,7 @@ from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions +from google.cloud.bigquery import external_config from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -5879,6 +5880,92 @@ def test_from_api_repr_only_foreign_keys_resource(self): self.assertIsNotNone(instance.foreign_keys) +class TestExternalCatalogTableOptions: + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + DATASET = DatasetReference(PROJECT, DATASET_ID) + TABLEREF = DATASET.table(TABLE_ID) + + @staticmethod + def _get_target_class(self): + from google.cloud.bigquery.table import Table + + return Table + + def _make_one(self, *args, **kw): + return self._get_target_class(self)(*args, **kw) + + EXTERNALCATALOGTABLEOPTIONS = { + "connection_id": "connection123", + "parameters": {"key": "value"}, + "storage_descriptor": { + "input_format": "testpath.to.OrcInputFormat", + "location_uri": "gs://test/path/", + "output_format": "testpath.to.OrcOutputFormat", + "serde_info": { + "serialization_library": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + }, + }, + } + + def test_external_catalog_table_options_default_initialization(self): + table = self._make_one(self.TABLEREF) + + assert table.external_catalog_table_options is None + + def test_external_catalog_table_options_valid_inputs(self): + table = self._make_one(self.TABLEREF) + + # supplied in api_repr format + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + # supplied in obj format + ecto = external_config.ExternalCatalogTableOptions.from_api_repr( + self.EXTERNALCATALOGTABLEOPTIONS + ) + assert isinstance(ecto, external_config.ExternalCatalogTableOptions) + + table.external_catalog_table_options = ecto + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + def test_external_catalog_table_options_invalid_input(self): + table = self._make_one(self.TABLEREF) + + # invalid on the whole + with pytest.raises(TypeError) as e: + table.external_catalog_table_options = 123 + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_external_catalog_table_options_to_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + def test_external_catalog_table_options_from_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + ecto = external_config.ExternalCatalogTableOptions.from_api_repr( + self.EXTERNALCATALOGTABLEOPTIONS + ) + result = ecto.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + @pytest.mark.parametrize( "table_path", ( From b44fda08cbe52acf2a5137d2056f006100aab938 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 21 Jan 2025 10:08:46 -0800 Subject: [PATCH 400/536] chore(main): release 3.29.0 (#2117) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a7ff5641..45c39e19c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.29.0](https://github.com/googleapis/python-bigquery/compare/v3.28.0...v3.29.0) (2025-01-21) + + +### Features + +* Add ExternalCatalogTableOptions class and tests ([#2116](https://github.com/googleapis/python-bigquery/issues/2116)) ([cdc1a6e](https://github.com/googleapis/python-bigquery/commit/cdc1a6e1623b8305c6a6a1a481b3365e866a073d)) + + +### Bug Fixes + +* Add default value in SchemaField.from_api_repr() ([#2115](https://github.com/googleapis/python-bigquery/issues/2115)) ([7de6822](https://github.com/googleapis/python-bigquery/commit/7de6822e1c556a68cb8d50e90664c094697cca1d)) + ## [3.28.0](https://github.com/googleapis/python-bigquery/compare/v3.27.0...v3.28.0) (2025-01-15) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 7da2c534f..3d852b8a3 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.28.0" +__version__ = "3.29.0" From d4070ca21b5797e900a9e87b966837ee1c278217 Mon Sep 17 00:00:00 2001 From: "Hiroki.H (mahiro)" <56078795+hrkh@users.noreply.github.com> Date: Wed, 22 Jan 2025 04:31:33 +0900 Subject: [PATCH 401/536] feat: support resource_tags for table (#2093) * feat: support resource_tags for table * fix: system test for resource tags * fix: typo * fix: unit test * Update tests/unit/test_client.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * fix: append random string suffix to resource tags to prevent test conflicts * Update google/cloud/bigquery/table.py --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/table.py | 17 +++++++++++++ tests/system/test_client.py | 44 ++++++++++++++++++++++++++++++++-- tests/unit/test_client.py | 6 ++++- tests/unit/test_table.py | 27 +++++++++++++++++++++ 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index fa8d81962..934a28cfc 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -409,6 +409,7 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", "max_staleness": "maxStaleness", + "resource_tags": "resourceTags", "external_catalog_table_options": "externalCatalogTableOptions", } @@ -1025,6 +1026,22 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @property + def resource_tags(self): + """Dict[str, str]: Resource tags for the table. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.resource_tags + """ + return self._properties.setdefault( + self._PROPERTY_TO_API_FIELD["resource_tags"], {} + ) + + @resource_tags.setter + def resource_tags(self, value): + if not isinstance(value, dict) and value is not None: + raise ValueError("resource_tags must be a dict or None") + self._properties[self._PROPERTY_TO_API_FIELD["resource_tags"]] = value + @property def external_catalog_table_options( self, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index c0dd83b12..30e9f94a3 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -732,6 +732,16 @@ def test_list_tables(self): def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) + # This creates unique tag keys for each of test runnings for different Python versions + tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4)) + tag_1 = f"owner_{tag_postfix}" + tag_2 = f"classification_{tag_postfix}" + tag_3 = f"env_{tag_postfix}" + + self._create_resource_tag_key_and_values(tag_1, ["Alice", "Bob"]) + self._create_resource_tag_key_and_values(tag_2, ["public"]) + self._create_resource_tag_key_and_values(tag_3, ["dev"]) + TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) @@ -744,14 +754,25 @@ def test_update_table(self): table.friendly_name = "Friendly" table.description = "Description" table.labels = {"priority": "high", "color": "blue"} + table.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "Alice", + f"{Config.CLIENT.project}/{tag_3}": "dev", + } table2 = Config.CLIENT.update_table( - table, ["friendly_name", "description", "labels"] + table, ["friendly_name", "description", "labels", "resource_tags"] ) self.assertEqual(table2.friendly_name, "Friendly") self.assertEqual(table2.description, "Description") self.assertEqual(table2.labels, {"priority": "high", "color": "blue"}) + self.assertEqual( + table2.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "Alice", + f"{Config.CLIENT.project}/{tag_3}": "dev", + }, + ) table2.description = None table2.labels = { @@ -759,9 +780,28 @@ def test_update_table(self): "shape": "circle", # add "priority": None, # delete } - table3 = Config.CLIENT.update_table(table2, ["description", "labels"]) + table2.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "Bob", # change + f"{Config.CLIENT.project}/{tag_2}": "public", # add + f"{Config.CLIENT.project}/{tag_3}": None, # delete + } + table3 = Config.CLIENT.update_table( + table2, ["description", "labels", "resource_tags"] + ) self.assertIsNone(table3.description) self.assertEqual(table3.labels, {"color": "green", "shape": "circle"}) + self.assertEqual( + table3.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "Bob", + f"{Config.CLIENT.project}/{tag_2}": "public", + }, + ) + + # Delete resource tag bindings. + table3.resource_tags = None + table4 = Config.CLIENT.update_table(table3, ["resource_tags"]) + self.assertEqual(table4.resource_tags, {}) # If we try to update using table2 again, it will fail because the # previous update changed the ETag. diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 14089b031..462a70bbe 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2320,6 +2320,7 @@ def test_update_table(self): "description": description, "friendlyName": title, "labels": {"x": "y"}, + "resourceTags": {"123456789012/key": "value"}, } ) schema = [ @@ -2343,7 +2344,8 @@ def test_update_table(self): table.description = description table.friendly_name = title table.labels = {"x": "y"} - fields = ["schema", "description", "friendly_name", "labels"] + table.resource_tags = {"123456789012/key": "value"} + fields = ["schema", "description", "friendly_name", "labels", "resource_tags"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -2375,6 +2377,7 @@ def test_update_table(self): "description": description, "friendlyName": title, "labels": {"x": "y"}, + "resourceTags": {"123456789012/key": "value"}, } conn.api_request.assert_called_once_with( method="PATCH", data=sent, path="/" + path, timeout=7.5 @@ -2383,6 +2386,7 @@ def test_update_table(self): self.assertEqual(updated_table.friendly_name, table.friendly_name) self.assertEqual(updated_table.schema, table.schema) self.assertEqual(updated_table.labels, table.labels) + self.assertEqual(updated_table.resource_tags, table.resource_tags) # ETag becomes If-Match header. table._properties["etag"] = "etag" diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index de8b331f5..5154f01d8 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1481,6 +1481,33 @@ def test_encryption_configuration_setter(self): table.encryption_configuration = None self.assertIsNone(table.encryption_configuration) + def test_resource_tags_getter_empty(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + self.assertEqual(table.resource_tags, {}) + + def test_resource_tags_update_in_place(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.resource_tags["123456789012/key"] = "value" + self.assertEqual(table.resource_tags, {"123456789012/key": "value"}) + + def test_resource_tags_setter(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.resource_tags = {"123456789012/key": "value"} + self.assertEqual(table.resource_tags, {"123456789012/key": "value"}) + + def test_resource_tags_setter_bad_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + with self.assertRaises(ValueError): + table.resource_tags = 12345 + def test___repr__(self): from google.cloud.bigquery.table import TableReference From 3d62c165c510daf8a3f000e8c6e4acf7b58cf67c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 27 Jan 2025 12:13:24 -0800 Subject: [PATCH 402/536] chore(python): fix docs publish build (#2113) Source-Link: https://github.com/googleapis/synthtool/commit/bd9ede2fea1b640b7e90d5a1d110e6b300a2b43f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:04c35dc5f49f0f503a306397d6d043685f8d2bb822ab515818c4208d7fb2db3a Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .github/.OwlBot.lock.yaml | 4 +- .kokoro/docker/docs/requirements.in | 1 + .kokoro/docker/docs/requirements.txt | 243 ++++++++++++++++++++++++++- .kokoro/publish-docs.sh | 4 - 4 files changed, 237 insertions(+), 15 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 10cf433a8..4c0027ff1 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a -# created: 2025-01-09T12:01:16.422459506Z + digest: sha256:04c35dc5f49f0f503a306397d6d043685f8d2bb822ab515818c4208d7fb2db3a +# created: 2025-01-16T15:24:11.364245182Z diff --git a/.kokoro/docker/docs/requirements.in b/.kokoro/docker/docs/requirements.in index 816817c67..586bd0703 100644 --- a/.kokoro/docker/docs/requirements.in +++ b/.kokoro/docker/docs/requirements.in @@ -1 +1,2 @@ nox +gcp-docuploader diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index f99a5c4aa..a9360a25b 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -2,16 +2,124 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in +# pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.5.2 \ - --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ - --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb +argcomplete==3.5.3 \ + --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ + --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 # via nox +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a + # via google-auth +certifi==2024.12.14 \ + --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ + --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db + # via requests +charset-normalizer==3.4.1 \ + --hash=sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537 \ + --hash=sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa \ + --hash=sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a \ + --hash=sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294 \ + --hash=sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b \ + --hash=sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd \ + --hash=sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601 \ + --hash=sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd \ + --hash=sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4 \ + --hash=sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d \ + --hash=sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2 \ + --hash=sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313 \ + --hash=sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd \ + --hash=sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa \ + --hash=sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8 \ + --hash=sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1 \ + --hash=sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2 \ + --hash=sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496 \ + --hash=sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d \ + --hash=sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b \ + --hash=sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e \ + --hash=sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a \ + --hash=sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4 \ + --hash=sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca \ + --hash=sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78 \ + --hash=sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408 \ + --hash=sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5 \ + --hash=sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3 \ + --hash=sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f \ + --hash=sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a \ + --hash=sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765 \ + --hash=sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6 \ + --hash=sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146 \ + --hash=sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6 \ + --hash=sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9 \ + --hash=sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd \ + --hash=sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c \ + --hash=sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f \ + --hash=sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545 \ + --hash=sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176 \ + --hash=sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770 \ + --hash=sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824 \ + --hash=sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f \ + --hash=sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf \ + --hash=sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487 \ + --hash=sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d \ + --hash=sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd \ + --hash=sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b \ + --hash=sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534 \ + --hash=sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f \ + --hash=sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b \ + --hash=sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9 \ + --hash=sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd \ + --hash=sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125 \ + --hash=sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9 \ + --hash=sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de \ + --hash=sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11 \ + --hash=sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d \ + --hash=sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35 \ + --hash=sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f \ + --hash=sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda \ + --hash=sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7 \ + --hash=sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a \ + --hash=sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971 \ + --hash=sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8 \ + --hash=sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41 \ + --hash=sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d \ + --hash=sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f \ + --hash=sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757 \ + --hash=sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a \ + --hash=sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886 \ + --hash=sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77 \ + --hash=sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76 \ + --hash=sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247 \ + --hash=sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85 \ + --hash=sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb \ + --hash=sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7 \ + --hash=sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e \ + --hash=sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6 \ + --hash=sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037 \ + --hash=sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1 \ + --hash=sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e \ + --hash=sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807 \ + --hash=sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407 \ + --hash=sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c \ + --hash=sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12 \ + --hash=sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3 \ + --hash=sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089 \ + --hash=sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd \ + --hash=sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e \ + --hash=sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00 \ + --hash=sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616 + # via requests +click==8.1.8 \ + --hash=sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2 \ + --hash=sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a + # via gcp-docuploader colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 - # via nox + # via + # gcp-docuploader + # nox distlib==0.3.9 \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 @@ -20,10 +128,78 @@ filelock==3.16.1 \ --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv +gcp-docuploader==0.6.5 \ + --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ + --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea + # via -r requirements.in +google-api-core==2.24.0 \ + --hash=sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9 \ + --hash=sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.37.0 \ + --hash=sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00 \ + --hash=sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0 + # via + # google-api-core + # google-cloud-core + # google-cloud-storage +google-cloud-core==2.4.1 \ + --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ + --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 + # via google-cloud-storage +google-cloud-storage==2.19.0 \ + --hash=sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba \ + --hash=sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2 + # via gcp-docuploader +google-crc32c==1.6.0 \ + --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ + --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ + --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ + --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ + --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ + --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ + --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ + --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ + --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ + --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ + --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ + --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ + --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ + --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ + --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ + --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ + --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ + --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ + --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ + --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ + --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ + --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ + --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ + --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ + --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ + --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ + --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.7.2 \ + --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ + --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 + # via google-cloud-storage +googleapis-common-protos==1.66.0 \ + --hash=sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c \ + --hash=sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed + # via google-api-core +idna==3.10 \ + --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ + --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 + # via requests nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in + # via -r requirements.in packaging==24.2 \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f @@ -32,6 +208,51 @@ platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv +proto-plus==1.25.0 \ + --hash=sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961 \ + --hash=sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91 + # via google-api-core +protobuf==5.29.3 \ + --hash=sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f \ + --hash=sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7 \ + --hash=sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888 \ + --hash=sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620 \ + --hash=sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da \ + --hash=sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252 \ + --hash=sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a \ + --hash=sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e \ + --hash=sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107 \ + --hash=sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f \ + --hash=sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84 + # via + # gcp-docuploader + # google-api-core + # googleapis-common-protos + # proto-plus +pyasn1==0.6.1 \ + --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ + --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 \ + --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ + --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c + # via google-auth +requests==2.32.3 \ + --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ + --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 + # via + # google-api-core + # google-cloud-storage +rsa==4.9 \ + --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ + --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 + # via google-auth +six==1.17.0 \ + --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ + --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 + # via gcp-docuploader tomli==2.2.1 \ --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ @@ -66,7 +287,11 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.0 \ - --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ - --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa +urllib3==2.3.0 \ + --hash=sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df \ + --hash=sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d + # via requests +virtualenv==20.28.1 \ + --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ + --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 # via nox diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 233205d58..4ed4aaf13 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -20,10 +20,6 @@ export PYTHONUNBUFFERED=1 export PATH="${HOME}/.local/bin:${PATH}" -# Install nox -python3.10 -m pip install --require-hashes -r .kokoro/requirements.txt -python3.10 -m nox --version - # build docs nox -s docs From 3a4894827f6e73a4a88cb22933c2004697dabcc7 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 31 Jan 2025 13:59:10 -0500 Subject: [PATCH 403/536] feat: add roundingmode enum, wiring, and tests (#2121) * feat: adds roundingmode and entity types * Adds rounding_mode to schema file and tests * tweaks RoundingMode docstring and roundingmode logic * Updates tests to apply better coverage for rounding_mode * Modifies docstring * Removes client-side validation, simplifies some code * Updates foreign_type_definition processing --- google/cloud/bigquery/enums.py | 45 ++++++++++++++++++++++++- google/cloud/bigquery/schema.py | 59 +++++++++++++++++++++++++++++++-- tests/unit/test_schema.py | 56 ++++++++++++++++++++++++++++++- 3 files changed, 156 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index d8cbe9969..5519bc989 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -246,6 +246,11 @@ class KeyResultStatementKind: class StandardSqlTypeNames(str, enum.Enum): + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in GoogleSQL. + """ + def _generate_next_value_(name, start, count, last_values): return name @@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values): ARRAY = enum.auto() STRUCT = enum.auto() RANGE = enum.auto() + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = enum.auto() class EntityTypes(str, enum.Enum): @@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum): # See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types class SqlTypeNames(str, enum.Enum): - """Enum of allowed SQL type names in schema.SchemaField.""" + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in Legacy SQL. + """ STRING = "STRING" BYTES = "BYTES" @@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" INTERVAL = "INTERVAL" # NOTE: not available in legacy types RANGE = "RANGE" # NOTE: not available in legacy types + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = "FOREIGN" class WriteDisposition(object): @@ -344,3 +358,32 @@ class DeterminismLevel: NOT_DETERMINISTIC = "NOT_DETERMINISTIC" """The UDF is not deterministic.""" + + +class RoundingMode(str, enum.Enum): + """Rounding mode options that can be used when storing NUMERIC or BIGNUMERIC + values. + + ROUNDING_MODE_UNSPECIFIED: will default to using ROUND_HALF_AWAY_FROM_ZERO. + + ROUND_HALF_AWAY_FROM_ZERO: rounds half values away from zero when applying + precision and scale upon writing of NUMERIC and BIGNUMERIC values. + For Scale: 0 + * 1.1, 1.2, 1.3, 1.4 => 1 + * 1.5, 1.6, 1.7, 1.8, 1.9 => 2 + + ROUND_HALF_EVEN: rounds half values to the nearest even value when applying + precision and scale upon writing of NUMERIC and BIGNUMERIC values. + For Scale: 0 + * 1.1, 1.2, 1.3, 1.4 => 1 + * 1.5 => 2 + * 1.6, 1.7, 1.8, 1.9 => 2 + * 2.5 => 2 + """ + + def _generate_next_value_(name, start, count, last_values): + return name + + ROUNDING_MODE_UNSPECIFIED = enum.auto() + ROUND_HALF_AWAY_FROM_ZERO = enum.auto() + ROUND_HALF_EVEN = enum.auto() diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 42dfbfca8..0f011a275 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -22,14 +22,15 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql +from google.cloud.bigquery import enums from google.cloud.bigquery.enums import StandardSqlTypeNames _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: -# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types -# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types +# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types +# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types LEGACY_TO_STANDARD_TYPES = { "STRING": StandardSqlTypeNames.STRING, "BYTES": StandardSqlTypeNames.BYTES, @@ -48,6 +49,7 @@ "DATE": StandardSqlTypeNames.DATE, "TIME": StandardSqlTypeNames.TIME, "DATETIME": StandardSqlTypeNames.DATETIME, + "FOREIGN": StandardSqlTypeNames.FOREIGN, # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } """String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" @@ -166,6 +168,35 @@ class SchemaField(object): the type is RANGE, this field is required. Possible values for the field element type of a RANGE include `DATE`, `DATETIME` and `TIMESTAMP`. + + rounding_mode: Union[enums.RoundingMode, str, None] + Specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + + Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO. + ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5, 1.6, 1.7, 1.8, 1.9 => 2 + + ROUND_HALF_EVEN rounds half values to the nearest even value + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5 => 2 + 1.6, 1.7, 1.8, 1.9 => 2 + 2.5 => 2 + + foreign_type_definition: Optional[str] + Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. """ def __init__( @@ -181,11 +212,14 @@ def __init__( scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, range_element_type: Union[FieldElementType, str, None] = None, + rounding_mode: Union[enums.RoundingMode, str, None] = None, + foreign_type_definition: Optional[str] = None, ): self._properties: Dict[str, Any] = { "name": name, "type": field_type, } + self._properties["name"] = name if mode is not None: self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: @@ -206,6 +240,11 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() + if rounding_mode is not None: + self._properties["roundingMode"] = rounding_mode + if foreign_type_definition is not None: + self._properties["foreignTypeDefinition"] = foreign_type_definition + if fields: # Don't set the property if it's not set. self._properties["fields"] = [field.to_api_repr() for field in fields] @@ -304,6 +343,22 @@ def range_element_type(self): ret = self._properties.get("rangeElementType") return FieldElementType.from_api_repr(ret) + @property + def rounding_mode(self): + """Enum that specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + """ + return self._properties.get("roundingMode") + + @property + def foreign_type_definition(self): + """Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. + """ + return self._properties.get("foreignTypeDefinition") + @property def fields(self): """Optional[tuple]: Subfields contained in this field. diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index efbc5d26f..467f1e1de 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -19,6 +19,7 @@ import pytest from google.cloud import bigquery +from google.cloud.bigquery import enums from google.cloud.bigquery.standard_sql import StandardSqlStructType from google.cloud.bigquery import schema from google.cloud.bigquery.schema import PolicyTagList @@ -49,6 +50,8 @@ def test_constructor_defaults(self): self.assertEqual(field.fields, ()) self.assertIsNone(field.policy_tags) self.assertIsNone(field.default_value_expression) + self.assertEqual(field.rounding_mode, None) + self.assertEqual(field.foreign_type_definition, None) def test_constructor_explicit(self): FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" @@ -64,6 +67,8 @@ def test_constructor_explicit(self): ) ), default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, + rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED, + foreign_type_definition="INTEGER", ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") @@ -80,6 +85,8 @@ def test_constructor_explicit(self): ) ), ) + self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") + self.assertEqual(field.foreign_type_definition, "INTEGER") def test_constructor_explicit_none(self): field = self._make_one("test", "STRING", description=None, policy_tags=None) @@ -137,8 +144,16 @@ def test_to_api_repr(self): {"names": ["foo", "bar"]}, ) + ROUNDINGMODE = enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED + field = self._make_one( - "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + "foo", + "INTEGER", + "NULLABLE", + description="hello world", + policy_tags=policy, + rounding_mode=ROUNDINGMODE, + foreign_type_definition=None, ) self.assertEqual( field.to_api_repr(), @@ -148,6 +163,7 @@ def test_to_api_repr(self): "type": "INTEGER", "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", }, ) @@ -181,6 +197,7 @@ def test_from_api_repr(self): "description": "test_description", "name": "foo", "type": "record", + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", } ) self.assertEqual(field.name, "foo") @@ -192,6 +209,7 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") self.assertEqual(field.range_element_type, None) + self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -283,6 +301,28 @@ def test_fields_property(self): schema_field = self._make_one("boat", "RECORD", fields=fields) self.assertEqual(schema_field.fields, fields) + def test_roundingmode_property_str(self): + ROUNDINGMODE = "ROUND_HALF_AWAY_FROM_ZERO" + schema_field = self._make_one("test", "STRING", rounding_mode=ROUNDINGMODE) + self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) + + del schema_field + schema_field = self._make_one("test", "STRING") + schema_field._properties["roundingMode"] = ROUNDINGMODE + self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) + + def test_foreign_type_definition_property_str(self): + FOREIGN_TYPE_DEFINITION = "INTEGER" + schema_field = self._make_one( + "test", "STRING", foreign_type_definition=FOREIGN_TYPE_DEFINITION + ) + self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + + del schema_field + schema_field = self._make_one("test", "STRING") + schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION + self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + def test_to_standard_sql_simple_type(self): examples = ( # a few legacy types @@ -457,6 +497,20 @@ def test_to_standard_sql_unknown_type(self): bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) + def test_to_standard_sql_foreign_type_valid(self): + legacy_type = "FOREIGN" + standard_type = bigquery.StandardSqlTypeNames.FOREIGN + foreign_type_definition = "INTEGER" + + field = self._make_one( + "some_field", + field_type=legacy_type, + foreign_type_definition=foreign_type_definition, + ) + standard_field = field.to_standard_sql() + self.assertEqual(standard_field.name, "some_field") + self.assertEqual(standard_field.type.type_kind, standard_type) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING") other = object() From 54c8d07f06a8ae460c9e0fb1614e1fbc21efb5df Mon Sep 17 00:00:00 2001 From: Kien Truong Date: Tue, 4 Feb 2025 03:37:26 +0700 Subject: [PATCH 404/536] fix: avoid blocking in download thread when using BQ Storage API (#2034) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This prevents a deadlock between the main thead and download threads when the threadpool is shutdown prematurely. Co-authored-by: Chalmer Lowe Co-authored-by: Tim Sweña (Swast) --- google/cloud/bigquery/_pandas_helpers.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index bf7d10c0f..050672531 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -796,10 +796,15 @@ def _download_table_bqstorage_stream( rowstream = reader.rows(session) for page in rowstream.pages: - if download_state.done: - return item = page_to_item(page) - worker_queue.put(item) + while True: + if download_state.done: + return + try: + worker_queue.put(item, timeout=_PROGRESS_INTERVAL) + break + except queue.Full: # pragma: NO COVER + continue def _nowait(futures): From 5e7d5eda5a8e9d32b38684a2e44f3e8e84e12876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 6 Feb 2025 12:43:32 -0600 Subject: [PATCH 405/536] test: add unit test covering the case where worker streams are stopped early (#2127) * test: add unit test covering the case where worker streams are stopped early * use older pyarrow.record_batch constructor * remove flakey log-based tests from snippets * add a gc.collect() call to make sure threads are supposed to be cleaned up --- google/cloud/bigquery/_pandas_helpers.py | 69 ++++++++++---- samples/tests/test_download_public_data.py | 15 +-- .../test_download_public_data_sandbox.py | 17 +--- tests/unit/test__pandas_helpers.py | 93 +++++++++++++++++++ 4 files changed, 146 insertions(+), 48 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 050672531..4f70f6c29 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,6 +20,7 @@ from itertools import islice import logging import queue +import threading import warnings from typing import Any, Union, Optional, Callable, Generator, List @@ -119,6 +120,21 @@ def __init__(self): # be an atomic operation in the Python language definition (enforced by # the global interpreter lock). self.done = False + # To assist with testing and understanding the behavior of the + # download, use this object as shared state to track how many worker + # threads have started and have gracefully shutdown. + self._started_workers_lock = threading.Lock() + self.started_workers = 0 + self._finished_workers_lock = threading.Lock() + self.finished_workers = 0 + + def start(self): + with self._started_workers_lock: + self.started_workers += 1 + + def finish(self): + with self._finished_workers_lock: + self.finished_workers += 1 BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { @@ -786,25 +802,35 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - reader = bqstorage_client.read_rows(stream.name) + download_state.start() + try: + reader = bqstorage_client.read_rows(stream.name) - # Avoid deprecation warnings for passing in unnecessary read session. - # https://github.com/googleapis/python-bigquery-storage/issues/229 - if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: - rowstream = reader.rows() - else: - rowstream = reader.rows(session) - - for page in rowstream.pages: - item = page_to_item(page) - while True: - if download_state.done: - return - try: - worker_queue.put(item, timeout=_PROGRESS_INTERVAL) - break - except queue.Full: # pragma: NO COVER - continue + # Avoid deprecation warnings for passing in unnecessary read session. + # https://github.com/googleapis/python-bigquery-storage/issues/229 + if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + rowstream = reader.rows() + else: + rowstream = reader.rows(session) + + for page in rowstream.pages: + item = page_to_item(page) + + # Make sure we set a timeout on put() so that we give the worker + # thread opportunities to shutdown gracefully, for example if the + # parent thread shuts down or the parent generator object which + # collects rows from all workers goes out of scope. See: + # https://github.com/googleapis/python-bigquery/issues/2032 + while True: + if download_state.done: + return + try: + worker_queue.put(item, timeout=_PROGRESS_INTERVAL) + break + except queue.Full: + continue + finally: + download_state.finish() def _nowait(futures): @@ -830,6 +856,7 @@ def _download_table_bqstorage( page_to_item: Optional[Callable] = None, max_queue_size: Any = _MAX_QUEUE_SIZE_DEFAULT, max_stream_count: Optional[int] = None, + download_state: Optional[_DownloadState] = None, ) -> Generator[Any, None, None]: """Downloads a BigQuery table using the BigQuery Storage API. @@ -857,6 +884,9 @@ def _download_table_bqstorage( is True, the requested streams are limited to 1 regardless of the `max_stream_count` value. If 0 or None, then the number of requested streams will be unbounded. Defaults to None. + download_state (Optional[_DownloadState]): + A threadsafe state object which can be used to observe the + behavior of the worker threads created by this method. Yields: pandas.DataFrame: Pandas DataFrames, one for each chunk of data @@ -915,7 +945,8 @@ def _download_table_bqstorage( # Use _DownloadState to notify worker threads when to quit. # See: https://stackoverflow.com/a/29237343/101923 - download_state = _DownloadState() + if download_state is None: + download_state = _DownloadState() # Create a queue to collect frames as they are created in each thread. # diff --git a/samples/tests/test_download_public_data.py b/samples/tests/test_download_public_data.py index 02c2c6f9c..4f6c02452 100644 --- a/samples/tests/test_download_public_data.py +++ b/samples/tests/test_download_public_data.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging - import pytest from .. import download_public_data @@ -21,20 +19,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data( - caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] -) -> None: - # Enable debug-level logging to verify the BigQuery Storage API is used. - caplog.set_level(logging.DEBUG) - +def test_download_public_data(capsys: pytest.CaptureFixture[str]) -> None: download_public_data.download_public_data() out, _ = capsys.readouterr() assert "year" in out assert "gender" in out assert "name" in out - - assert any( - "Started reading table 'bigquery-public-data.usa_names.usa_1910_current' with BQ Storage API session" - in message - for message in caplog.messages - ) diff --git a/samples/tests/test_download_public_data_sandbox.py b/samples/tests/test_download_public_data_sandbox.py index e86f604ad..d3dd31a38 100644 --- a/samples/tests/test_download_public_data_sandbox.py +++ b/samples/tests/test_download_public_data_sandbox.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging - import pytest from .. import download_public_data_sandbox @@ -21,20 +19,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data_sandbox( - caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] -) -> None: - # Enable debug-level logging to verify the BigQuery Storage API is used. - caplog.set_level(logging.DEBUG) - +def test_download_public_data_sandbox(capsys: pytest.CaptureFixture[str]) -> None: download_public_data_sandbox.download_public_data_sandbox() - out, err = capsys.readouterr() + out, _ = capsys.readouterr() assert "year" in out assert "gender" in out assert "name" in out - - assert any( - # An anonymous table is used because this sample reads from query results. - ("Started reading table" in message and "BQ Storage API session" in message) - for message in caplog.messages - ) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 3a5fddacc..edfaadf69 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -16,6 +16,7 @@ import datetime import decimal import functools +import gc import operator import queue from typing import Union @@ -1846,6 +1847,98 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test__download_table_bqstorage_shuts_down_workers( + monkeypatch, + module_under_test, +): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2032 + + Make sure that when the top-level iterator goes out of scope (is deleted), + the child threads are also stopped. + """ + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr( + _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None + ) + monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") + + # Create a fake stream with a decent number of rows. + arrow_schema = pyarrow.schema( + [ + ("int_col", pyarrow.int64()), + ("str_col", pyarrow.string()), + ] + ) + arrow_rows = pyarrow.record_batch( + [ + pyarrow.array([0, 1, 2], type=pyarrow.int64()), + pyarrow.array(["a", "b", "c"], type=pyarrow.string()), + ], + schema=arrow_schema, + ) + session = google.cloud.bigquery_storage_v1.types.ReadSession() + session.data_format = "ARROW" + session.arrow_schema = {"serialized_schema": arrow_schema.serialize().to_pybytes()} + session.streams = [ + google.cloud.bigquery_storage_v1.types.ReadStream(name=name) + for name in ("stream/s0", "stream/s1", "stream/s2") + ] + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + reader.__iter__.return_value = [ + google.cloud.bigquery_storage_v1.types.ReadRowsResponse( + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, + arrow_record_batch={ + "serialized_record_batch": arrow_rows.serialize().to_pybytes() + }, + ) + for _ in range(100) + ] + reader.rows.return_value = google.cloud.bigquery_storage_v1.reader.ReadRowsIterable( + reader, read_session=session + ) + bqstorage_client.read_rows.return_value = reader + bqstorage_client.create_read_session.return_value = session + table_ref = table.TableReference( + dataset.DatasetReference("project-x", "dataset-y"), + "table-z", + ) + download_state = module_under_test._DownloadState() + assert download_state.started_workers == 0 + assert download_state.finished_workers == 0 + + result_gen = module_under_test._download_table_bqstorage( + "some-project", + table_ref, + bqstorage_client, + max_queue_size=1, + page_to_item=module_under_test._bqstorage_page_to_arrow, + download_state=download_state, + ) + + result_gen_iter = iter(result_gen) + next(result_gen_iter) + assert download_state.started_workers == 3 + assert download_state.finished_workers == 0 + + # Stop iteration early and simulate the variables going out of scope + # to be doubly sure that the worker threads are supposed to be cleaned up. + del result_gen, result_gen_iter + gc.collect() + + assert download_state.started_workers == 3 + assert download_state.finished_workers == 3 + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( From b5bcfb303d27015b747a3b0747ecd7f7ed0ed557 Mon Sep 17 00:00:00 2001 From: Alicia Williams Date: Fri, 14 Feb 2025 12:50:18 -0800 Subject: [PATCH 406/536] docs: update magics.rst (#2125) * Update magics.rst use bigquery-magics package for the %%bigquery magic * Update magics.rst add back space * update reference and link for bigquery magics --------- Co-authored-by: Lingqing Gan --- docs/magics.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/magics.rst b/docs/magics.rst index aa14c6bfa..549d67f76 100644 --- a/docs/magics.rst +++ b/docs/magics.rst @@ -6,7 +6,7 @@ in a Jupyter notebook cell. .. code:: - %load_ext google.cloud.bigquery + %load_ext bigquery_magics This makes the ``%%bigquery`` magic available. @@ -27,8 +27,9 @@ Running a parameterized query: :start-after: [START bigquery_jupyter_query_params_scalars] :end-before: [END bigquery_jupyter_query_params_scalars] -API Reference -------------- +BigQuery Magics Reference +------------------------- -.. automodule:: google.cloud.bigquery.magics.magics - :members: +- `BigQuery Magics Documentation`_ + +.. _BigQuery Magics Documentation: https://googleapis.dev/python/bigquery-magics/latest From b03a2afabde7f42be45f62fabd3dc0e6a9a493e1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 14:11:49 -0800 Subject: [PATCH 407/536] chore(deps): bump cryptography from 43.0.1 to 44.0.1 in /.kokoro (#2130) Bumps [cryptography](https://github.com/pyca/cryptography) from 43.0.1 to 44.0.1. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/43.0.1...44.0.1) --- updated-dependencies: - dependency-name: cryptography dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- .kokoro/requirements.txt | 60 +++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 16db448c1..6ad95a04a 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -112,34 +112,38 @@ colorlog==6.8.2 \ # via # gcp-docuploader # nox -cryptography==43.0.1 \ - --hash=sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494 \ - --hash=sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806 \ - --hash=sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d \ - --hash=sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062 \ - --hash=sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2 \ - --hash=sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4 \ - --hash=sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1 \ - --hash=sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85 \ - --hash=sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84 \ - --hash=sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042 \ - --hash=sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d \ - --hash=sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962 \ - --hash=sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2 \ - --hash=sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa \ - --hash=sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d \ - --hash=sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365 \ - --hash=sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96 \ - --hash=sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47 \ - --hash=sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d \ - --hash=sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d \ - --hash=sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c \ - --hash=sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb \ - --hash=sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277 \ - --hash=sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172 \ - --hash=sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034 \ - --hash=sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a \ - --hash=sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289 +cryptography==44.0.1 \ + --hash=sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7 \ + --hash=sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3 \ + --hash=sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183 \ + --hash=sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69 \ + --hash=sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a \ + --hash=sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62 \ + --hash=sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911 \ + --hash=sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7 \ + --hash=sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a \ + --hash=sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41 \ + --hash=sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83 \ + --hash=sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12 \ + --hash=sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864 \ + --hash=sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf \ + --hash=sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c \ + --hash=sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2 \ + --hash=sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b \ + --hash=sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0 \ + --hash=sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4 \ + --hash=sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9 \ + --hash=sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008 \ + --hash=sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862 \ + --hash=sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009 \ + --hash=sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7 \ + --hash=sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f \ + --hash=sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026 \ + --hash=sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f \ + --hash=sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd \ + --hash=sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420 \ + --hash=sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14 \ + --hash=sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00 # via # -r requirements.in # gcp-releasetool From 7603bd71d60592ef2a551d9eea09987b218edc73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 21 Feb 2025 11:44:59 -0600 Subject: [PATCH 408/536] deps: use pandas-gbq to determine schema in `load_table_from_dataframe` (#2095) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: use pandas-gbq to determine schema in `load_table_from_dataframe` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix some unit tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * bump minimum pandas-gbq to 0.26.1 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * drop pandas-gbq from python 3.7 extras * relax warning message text assertion * use consistent time zone presense/absense in time datetime system test * Update google/cloud/bigquery/_pandas_helpers.py * Update google/cloud/bigquery/_pandas_helpers.py Co-authored-by: Chalmer Lowe * remove pandas-gbq from at least 1 unit test and system test session --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/_pandas_helpers.py | 35 +++++++++++- google/cloud/bigquery/_pyarrow_helpers.py | 7 ++- noxfile.py | 15 ++++++ pyproject.toml | 3 ++ testing/constraints-3.8.txt | 9 ++++ tests/system/test_pandas.py | 2 +- tests/unit/test__pandas_helpers.py | 65 +++++++++++++++++++---- tests/unit/test_client.py | 33 +++++++++--- 8 files changed, 147 insertions(+), 22 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 4f70f6c29..0017d92ce 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared helper functions for connecting BigQuery and pandas.""" +"""Shared helper functions for connecting BigQuery and pandas. + +NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, +instead. See: go/pandas-gbq-and-bigframes-redundancy and +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pandas_to_bigquery.py +""" import concurrent.futures from datetime import datetime @@ -40,6 +45,16 @@ else: import numpy + +try: + import pandas_gbq.schema.pandas_to_bigquery # type: ignore + + pandas_gbq_import_exception = None +except ImportError as exc: + pandas_gbq = None + pandas_gbq_import_exception = exc + + try: import db_dtypes # type: ignore @@ -445,6 +460,10 @@ def _first_array_valid(series): def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. + DEPRECATED: Use + pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(), + instead. See: go/pandas-gbq-and-bigframes-redundancy. + Args: dataframe (pandas.DataFrame): DataFrame for which the client determines the BigQuery schema. @@ -460,6 +479,20 @@ def dataframe_to_bq_schema(dataframe, bq_schema): The automatically determined schema. Returns None if the type of any column cannot be determined. """ + if pandas_gbq is None: + warnings.warn( + "Loading pandas DataFrame into BigQuery will require pandas-gbq " + "package version 0.26.1 or greater in the future. " + f"Tried to import pandas-gbq and got: {pandas_gbq_import_exception}", + category=FutureWarning, + ) + else: + return pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields( + dataframe, + override_bigquery_fields=bq_schema, + index=True, + ) + if bq_schema: bq_schema = schema._to_schema_fields(bq_schema) bq_schema_index = {field.name: field for field in bq_schema} diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 3c745a611..1b42cd5c7 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared helper functions for connecting BigQuery and pyarrow.""" +"""Shared helper functions for connecting BigQuery and pyarrow. + +NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, +instead. See: go/pandas-gbq-and-bigframes-redundancy and +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py +""" from typing import Any diff --git a/noxfile.py b/noxfile.py index e08956b11..87bd9a70c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -110,6 +110,14 @@ def default(session, install_extras=True): else: install_target = "." session.install("-e", install_target, "-c", constraints_path) + + # Test with some broken "extras" in case the user didn't install the extra + # directly. For example, pandas-gbq is recommended for pandas features, but + # we want to test that we fallback to the previous behavior. For context, + # see internal document go/pandas-gbq-and-bigframes-redundancy. + if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y") + session.run("python", "-m", "pip", "freeze") # Run py.test against the unit tests. @@ -228,6 +236,13 @@ def system(session): extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) + # Test with some broken "extras" in case the user didn't install the extra + # directly. For example, pandas-gbq is recommended for pandas features, but + # we want to test that we fallback to the previous behavior. For context, + # see internal document go/pandas-gbq-and-bigframes-redundancy. + if session.python == SYSTEM_TEST_PYTHON_VERSIONS[0]: + session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y") + # print versions of all dependencies session.run("python", "-m", "pip", "freeze") diff --git a/pyproject.toml b/pyproject.toml index ecf21d922..c4e5c2f0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,9 @@ bqstorage = [ ] pandas = [ "pandas >= 1.1.0", + "pandas-gbq >= 0.26.1; python_version >= '3.8'", + "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", "pyarrow >= 3.0.0", "db-dtypes >= 0.3.0, < 2.0.0dev", "importlib_metadata >= 1.0.0; python_version < '3.8'", diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index e5e73c5c7..9883fb8cc 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -1,2 +1,11 @@ grpcio==1.47.0 pandas==1.2.0 + +# This constraints file is used to check that lower bounds +# are correct in setup.py +# +# Pin the version to the lower bound. +# +# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", +# Then this file should have foo==1.14.0 +pandas-gbq==0.26.1 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 85c7b79e6..a9e76d416 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1259,7 +1259,7 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id): df = pandas.DataFrame( dict( dt=[ - datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime(2020, 1, 8, 8, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime( 2020, 1, diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index edfaadf69..fdd232a5c 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -35,6 +35,11 @@ except ImportError: pandas = None +try: + import pandas_gbq.schema.pandas_to_bigquery +except ImportError: + pandas_gbq = None + try: import geopandas except ImportError: @@ -1281,7 +1286,21 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_named_index(module_under_test): +@pytest.mark.skipif(pandas_gbq is None, reason="Requires `pandas-gbq`") +def test_dataframe_to_bq_schema_returns_schema_with_pandas_gbq( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) + got = module_under_test.dataframe_to_bq_schema(dataframe, []) + # Don't assert beyond this, since pandas-gbq is now source of truth. + assert got is not None + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_named_index(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1292,7 +1311,8 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test): index = pandas.Index(["a", "b"], name="str_index") dataframe = pandas.DataFrame(df_data, index=index) - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) expected_schema = ( schema.SchemaField("str_index", "STRING", "NULLABLE"), @@ -1304,7 +1324,9 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_multiindex(module_under_test): +def test_dataframe_to_bq_schema_w_multiindex(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1321,7 +1343,8 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test): ) dataframe = pandas.DataFrame(df_data, index=index) - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) expected_schema = ( schema.SchemaField("str_index", "STRING", "NULLABLE"), @@ -1335,7 +1358,9 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): +def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1350,7 +1375,10 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, ] - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema( + dataframe, dict_schema + ) expected_schema = ( schema.SchemaField("str_column", "STRING", "NULLABLE"), @@ -1361,7 +1389,11 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): +def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, @@ -1389,7 +1421,11 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): +def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)}, @@ -1419,7 +1455,9 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): +def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"struct_field": {"one": 2}, "status": "FOO"}, @@ -1443,9 +1481,11 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): @pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") -def test_dataframe_to_bq_schema_geography(module_under_test): +def test_dataframe_to_bq_schema_geography(module_under_test, monkeypatch): from shapely import wkt + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df = geopandas.GeoDataFrame( pandas.DataFrame( dict( @@ -1456,7 +1496,10 @@ def test_dataframe_to_bq_schema_geography(module_under_test): ), geometry="geo1", ) - bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + + with pytest.warns(FutureWarning, match="pandas-gbq"): + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( schema.SchemaField("name", "STRING"), schema.SchemaField("geo1", "GEOGRAPHY"), diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 462a70bbe..a5af37b6b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8391,8 +8391,12 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): autospec=True, side_effect=google.api_core.exceptions.NotFound("Table not found"), ) + pandas_gbq_patch = mock.patch( + "google.cloud.bigquery._pandas_helpers.pandas_gbq", + new=None, + ) - with load_patch as load_table_from_file, get_table_patch: + with load_patch as load_table_from_file, get_table_patch, pandas_gbq_patch: with warnings.catch_warnings(record=True) as warned: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION @@ -8448,7 +8452,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, @@ -8460,6 +8463,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): ] ), ) + with load_patch as load_table_from_file, get_table_patch: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION @@ -8580,10 +8584,10 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se client = self._make_client() dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64") + load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, @@ -8612,8 +8616,11 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert tuple(sent_config.schema) == ( - SchemaField("x", "INT64", "NULLABLE", None), + assert ( + # Accept either the GoogleSQL or legacy SQL type name from pandas-gbq. + tuple(sent_config.schema) == (SchemaField("x", "INT64", "NULLABLE", None),) + or tuple(sent_config.schema) + == (SchemaField("x", "INTEGER", "NULLABLE", None),) ) def test_load_table_from_dataframe_struct_fields(self): @@ -8759,7 +8766,7 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): data=records, columns=["float_column", "array_column"] ) - expected_schema = [ + expected_schema_googlesql = [ SchemaField("float_column", "FLOAT"), SchemaField( "array_column", @@ -8767,6 +8774,14 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): mode="REPEATED", ), ] + expected_schema_legacy_sql = [ + SchemaField("float_column", "FLOAT"), + SchemaField( + "array_column", + "INTEGER", + mode="REPEATED", + ), + ] load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True @@ -8802,7 +8817,10 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema == expected_schema + assert ( + sent_config.schema == expected_schema_googlesql + or sent_config.schema == expected_schema_legacy_sql + ) def test_load_table_from_dataframe_w_partial_schema(self): pandas = pytest.importorskip("pandas") @@ -8922,7 +8940,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): load_table_from_file.assert_not_called() message = str(exc_context.value) - assert "bq_schema contains fields not present in dataframe" in message assert "unknown_col" in message def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): From 2c1968115bef8e1dc84e0125615f551b9b011a4b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 21 Feb 2025 12:45:15 -0500 Subject: [PATCH 409/536] Feat: Adds foreign_type_info attribute to table class and adds unit tests. (#2126) * adds foreign_type_info attribute to table * feat: Adds foreign_type_info attribute and tests * updates docstrings for foreign_type_info * Updates property handling, especially as regards set/get_sub_prop * Removes extraneous comments and debug expressions * Refactors build_resource_from_properties w get/set_sub_prop * updates to foreign_type_info, tests and wiring * Adds logic to detect non-Sequence schema.fields value * updates assorted tests and logic --- google/cloud/bigquery/_helpers.py | 4 +- google/cloud/bigquery/schema.py | 57 ++++++---- google/cloud/bigquery/table.py | 75 +++++++++++- tests/unit/job/test_load.py | 2 +- tests/unit/test_client.py | 8 +- tests/unit/test_schema.py | 183 +++++++++++++++++++----------- tests/unit/test_table.py | 173 +++++++++++++++++++++++++++- 7 files changed, 398 insertions(+), 104 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index ea47af28d..d40217c4d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -978,11 +978,11 @@ def _build_resource_from_properties(obj, filter_fields): """ partial = {} for filter_field in filter_fields: - api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field) + api_field = _get_sub_prop(obj._PROPERTY_TO_API_FIELD, filter_field) if api_field is None and filter_field not in obj._properties: raise ValueError("No property %s" % filter_field) elif api_field is not None: - partial[api_field] = obj._properties.get(api_field) + _set_sub_prop(partial, api_field, _get_sub_prop(obj._properties, api_field)) else: # allows properties that are not defined in the library # and properties that have the same name as API resource key diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 0f011a275..03cde830e 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -15,10 +15,9 @@ """Schemas for BigQuery tables / queries.""" from __future__ import annotations -import collections import enum import typing -from typing import Any, cast, Dict, Iterable, Optional, Union +from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql @@ -489,6 +488,8 @@ def _parse_schema_resource(info): Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]: A list of parsed fields, or ``None`` if no "fields" key found. """ + if isinstance(info, list): + return [SchemaField.from_api_repr(f) for f in info] return [SchemaField.from_api_repr(f) for f in info.get("fields", ())] @@ -501,40 +502,46 @@ def _build_schema_resource(fields): Returns: Sequence[Dict]: Mappings describing the schema of the supplied fields. """ - return [field.to_api_repr() for field in fields] + if isinstance(fields, Sequence): + # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields + return [field.to_api_repr() for field in fields] + + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") def _to_schema_fields(schema): - """Coerce `schema` to a list of schema field instances. + """Coerces schema to a list of SchemaField instances while + preserving the original structure as much as possible. Args: - schema(Sequence[Union[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - Mapping[str, Any] \ - ]]): - Table schema to convert. If some items are passed as mappings, - their content must be compatible with - :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. + schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ] + ] + ):: + Table schema to convert. Can be a list of SchemaField + objects or mappings. Returns: - Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] + A list of SchemaField objects. Raises: - Exception: If ``schema`` is not a sequence, or if any item in the - sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` - instance or a compatible mapping representation of the field. + TypeError: If schema is not a Sequence. """ - for field in schema: - if not isinstance(field, (SchemaField, collections.abc.Mapping)): - raise ValueError( - "Schema items must either be fields or compatible " - "mapping representations." - ) - return [ - field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) - for field in schema - ] + if isinstance(schema, Sequence): + # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields + return [ + field + if isinstance(field, SchemaField) + else SchemaField.from_api_repr(field) + for field in schema + ] + + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") class PolicyTagList(object): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 934a28cfc..c70a0ebea 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -21,7 +21,8 @@ import functools import operator import typing -from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence + import warnings try: @@ -66,6 +67,7 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.enums import DefaultPandasDTypes from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery import schema as _schema from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -398,7 +400,7 @@ class Table(_TableBase): "partitioning_type": "timePartitioning", "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", - "schema": "schema", + "schema": ["schema", "fields"], "snapshot_definition": "snapshotDefinition", "clone_definition": "cloneDefinition", "streaming_buffer": "streamingBuffer", @@ -411,6 +413,7 @@ class Table(_TableBase): "max_staleness": "maxStaleness", "resource_tags": "resourceTags", "external_catalog_table_options": "externalCatalogTableOptions", + "foreign_type_info": ["schema", "foreignTypeInfo"], } def __init__(self, table_ref, schema=None) -> None: @@ -451,8 +454,20 @@ def schema(self): If ``schema`` is not a sequence, or if any item in the sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` instance or a compatible mapping representation of the field. + + .. Note:: + If you are referencing a schema for an external catalog table such + as a Hive table, it will also be necessary to populate the foreign_type_info + attribute. This is not necessary if defining the schema for a BigQuery table. + + For details, see: + https://cloud.google.com/bigquery/docs/external-tables + https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets + """ - prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) + prop = _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["schema"] + ) if not prop: return [] else: @@ -463,10 +478,21 @@ def schema(self, value): api_field = self._PROPERTY_TO_API_FIELD["schema"] if value is None: - self._properties[api_field] = None - else: + _helpers._set_sub_prop( + self._properties, + api_field, + None, + ) + elif isinstance(value, Sequence): value = _to_schema_fields(value) - self._properties[api_field] = {"fields": _build_schema_resource(value)} + value = _build_schema_resource(value) + _helpers._set_sub_prop( + self._properties, + api_field, + value, + ) + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") @property def labels(self): @@ -1075,6 +1101,43 @@ def external_catalog_table_options( self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] ] = value + @property + def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]: + """Optional. Specifies metadata of the foreign data type definition in + field schema (TableFieldSchema.foreign_type_definition). + + Returns: + Optional[schema.ForeignTypeInfo]: + Foreign type information, or :data:`None` if not set. + + .. Note:: + foreign_type_info is only required if you are referencing an + external catalog such as a Hive table. + For details, see: + https://cloud.google.com/bigquery/docs/external-tables + https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets + """ + + prop = _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"] + ) + if prop is not None: + return _schema.ForeignTypeInfo.from_api_repr(prop) + return None + + @foreign_type_info.setter + def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]): + value = _helpers._isinstance_or_raise( + value, + (_schema.ForeignTypeInfo, dict), + none_allowed=True, + ) + if isinstance(value, _schema.ForeignTypeInfo): + value = value.to_api_repr() + _helpers._set_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value + ) + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 0fb044696..10df46fb3 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -272,7 +272,7 @@ def test_schema_setter_invalid_field(self): config = LoadJobConfig() full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): config.schema = [full_name, object()] def test_schema_setter(self): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index a5af37b6b..6897c2552 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2051,7 +2051,7 @@ def test_update_dataset(self): ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] ds.resource_tags = RESOURCE_TAGS - fields = [ + filter_fields = [ "description", "friendly_name", "location", @@ -2065,12 +2065,12 @@ def test_update_dataset(self): ) as final_attributes: ds2 = client.update_dataset( ds, - fields=fields, + fields=filter_fields, timeout=7.5, ) final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "fields": fields}, client, None + {"path": "/%s" % PATH, "fields": filter_fields}, client, None ) conn.api_request.assert_called_once_with( @@ -2615,7 +2615,7 @@ def test_update_table_w_schema_None(self): self.assertEqual(len(conn.api_request.call_args_list), 2) req = conn.api_request.call_args_list[1] self.assertEqual(req[1]["method"], "PATCH") - sent = {"schema": None} + sent = {"schema": {"fields": None}} self.assertEqual(req[1]["data"], sent) self.assertEqual(req[1]["path"], "/%s" % path) self.assertEqual(len(updated_table.schema), 0) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 467f1e1de..3f2304a70 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -765,27 +765,62 @@ def test__parse_schema_resource_fields_without_mode(self): self._verifySchema(schema, RESOURCE) -class Test_build_schema_resource(unittest.TestCase, _SchemaBase): +class Test_build_schema_resource: + """Tests for the _build_schema_resource function.""" + def _call_fut(self, resource): - from google.cloud.bigquery.schema import _build_schema_resource + return schema._build_schema_resource(resource) + + FULL_NAME = schema.SchemaField( + name="full_name", field_type="STRING", mode="REQUIRED" + ) + AGE = schema.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED") + LIST_RESOURCE = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + FOREIGN_TYPE_INFO = schema.ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED") + FOREIGN_TYPE_INFO_RESOURCE = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} + + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param([], [], id="empty list"), + pytest.param([FULL_NAME, AGE], LIST_RESOURCE, id="list"), + ], + ) + def test_ctor_valid_input(self, schema, expected): + result = self._call_fut(schema) + + assert result == expected - return _build_schema_resource(resource) + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param(123, TypeError, id="invalid type"), + ], + ) + def test_ctor_invalid_input(self, schema, expected): + with pytest.raises(TypeError) as e: + self._call_fut(schema) + + # Looking for the first phrase from the string "Schema must be a ..." + assert "Schema must be a " in str(e.value) def test_defaults(self): from google.cloud.bigquery.schema import SchemaField full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="REQUIRED") + # test with simple list resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + } + assert resource[1] == {"name": "age", "type": "INTEGER", "mode": "REQUIRED"} def test_w_description(self): from google.cloud.bigquery.schema import SchemaField @@ -802,25 +837,20 @@ def test_w_description(self): description=None, ) resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": DESCRIPTION, - }, - ) - self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": DESCRIPTION, + } + + assert resource[1] == { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } def test_w_subfields(self): from google.cloud.bigquery.schema import SchemaField @@ -832,49 +862,72 @@ def test_w_subfields(self): "phone", "RECORD", mode="REPEATED", fields=[ph_type, ph_num] ) resource = self._call_fut([full_name, phone]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - { - "name": "phone", - "type": "RECORD", - "mode": "REPEATED", - "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, - ], - }, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + } + assert resource[1] == { + "name": "phone", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + ], + } + +class Test_to_schema_fields: + """Tests for the _to_schema_fields function.""" -class Test_to_schema_fields(unittest.TestCase): @staticmethod def _call_fut(schema): from google.cloud.bigquery.schema import _to_schema_fields return _to_schema_fields(schema) - def test_invalid_type(self): - schema = [ - ("full_name", "STRING", "REQUIRED"), - ("address", "STRING", "REQUIRED"), - ] - with self.assertRaises(ValueError): - self._call_fut(schema) - - def test_schema_fields_sequence(self): - from google.cloud.bigquery.schema import SchemaField + FULL_NAME = schema.SchemaField( + name="full_name", field_type="STRING", mode="REQUIRED" + ) + AGE = schema.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED") + LIST_RESOURCE = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + FOREIGN_TYPE_INFO = schema.ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED") + FOREIGN_TYPE_INFO_RESOURCE = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} - schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INT64", mode="NULLABLE"), - ] + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param([], [], id="empty list"), + pytest.param((), [], id="empty tuple"), + pytest.param(LIST_RESOURCE, [FULL_NAME, AGE], id="list"), + ], + ) + def test_ctor_valid_input(self, schema, expected): result = self._call_fut(schema) - self.assertEqual(result, schema) + + assert result == expected + + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param(123, TypeError, id="invalid schema type"), + pytest.param([123, 123], TypeError, id="invalid SchemaField type"), + pytest.param({"fields": 123}, TypeError, id="invalid type, dict"), + pytest.param( + {"fields": 123, "foreignTypeInfo": 123}, + TypeError, + id="invalid type, dict", + ), + ], + ) + def test_ctor_invalid_input(self, schema, expected): + with pytest.raises(expected): + self._call_fut(schema) def test_unknown_properties(self): schema = [ @@ -933,7 +986,7 @@ def test_valid_mapping_representation(self): ] result = self._call_fut(schema) - self.assertEqual(result, expected_schema) + assert result == expected_schema class TestPolicyTags(unittest.TestCase): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 5154f01d8..1a3d7ec0f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -31,6 +31,7 @@ from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions from google.cloud.bigquery import external_config +from google.cloud.bigquery import schema from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -699,7 +700,7 @@ def test_schema_setter_invalid_field(self): table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): table.schema = [full_name, object()] def test_schema_setter_valid_fields(self): @@ -1213,6 +1214,83 @@ def test_to_api_repr_w_unsetting_expiration(self): } self.assertEqual(resource, exp_resource) + def test_to_api_repr_w_schema_and_foreign_type_info(self): + """Tests to ensure that to_api_repr works correctly with + both schema and foreign_type_info fields + """ + + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + SCHEMA = { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreignTypeInfo": FOREIGNTYPEINFO, + } + + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": SCHEMA, + } + + table = self._get_target_class().from_api_repr(API_REPR) + assert table._properties == table.to_api_repr() + + # update schema (i.e. the fields), ensure foreign_type_info is unchanged + table.schema = [] + expected = { + "fields": [], + "foreignTypeInfo": {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}, + } + assert table.to_api_repr()["schema"] == expected + + # update foreign_type_info, ensure schema (i.e. the fields), is unchanged + table.foreign_type_info = {"typeSystem": "SCHEMA_SHOULD_NOT_CHANGE"} + expected = { + "fields": [], + "foreignTypeInfo": {"typeSystem": "SCHEMA_SHOULD_NOT_CHANGE"}, + } + assert table.to_api_repr()["schema"] == expected + + def test_from_api_repr_w_schema_and_foreign_type_info(self): + """Tests to ensure that to_api_repr works correctly with + both schema and foreign_type_info fields + """ + + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + SCHEMA = { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreignTypeInfo": FOREIGNTYPEINFO, + } + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": SCHEMA, + } + + table = self._get_target_class().from_api_repr(API_REPR) + assert table._properties == API_REPR + def test__build_resource_w_custom_field(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -5993,6 +6071,99 @@ def test_external_catalog_table_options_from_api_repr(self): assert result == expected +class TestForeignTypeInfo: + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + DATASET = DatasetReference(PROJECT, DATASET_ID) + TABLEREF = DATASET.table(TABLE_ID) + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreign_info_type": FOREIGNTYPEINFO, + }, + } + + from google.cloud.bigquery.schema import ForeignTypeInfo + + @staticmethod + def _get_target_class(self): + from google.cloud.bigquery.table import Table + + return Table + + def _make_one(self, *args, **kw): + return self._get_target_class(self)(*args, **kw) + + def test_foreign_type_info_default_initialization(self): + table = self._make_one(self.TABLEREF) + assert table.foreign_type_info is None + + @pytest.mark.parametrize( + "foreign_type_info, expected", + [ + ( + {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}, + "TYPE_SYSTEM_UNSPECIFIED", + ), + (None, None), + ( + ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED"), + "TYPE_SYSTEM_UNSPECIFIED", + ), + ], + ) + def test_foreign_type_info_valid_inputs(self, foreign_type_info, expected): + table = self._make_one(self.TABLEREF) + + table.foreign_type_info = foreign_type_info + + if foreign_type_info is None: + result = table.foreign_type_info + else: + result = table.foreign_type_info.type_system + assert result == expected + + def test_foreign_type_info_invalid_inputs(self): + table = self._make_one(self.TABLEREF) + + # invalid on the whole + with pytest.raises(TypeError, match="Pass .*"): + table.foreign_type_info = 123 + + def test_foreign_type_info_to_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.foreign_type_info = self.ForeignTypeInfo( + type_system="TYPE_SYSTEM_UNSPECIFIED", + ) + + result = table.to_api_repr()["schema"]["foreignTypeInfo"] + expected = self.FOREIGNTYPEINFO + assert result == expected + + def test_foreign_type_info_from_api_repr(self): + table = self._make_one(self.TABLEREF) + table.foreign_type_info = self.FOREIGNTYPEINFO + + fti = schema.ForeignTypeInfo.from_api_repr(self.FOREIGNTYPEINFO) + + result = fti.to_api_repr() + expected = self.FOREIGNTYPEINFO + assert result == expected + + @pytest.mark.parametrize( "table_path", ( From fea49ffbf8aa1d53451864ceb7fd73189b6661cb Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 21 Feb 2025 14:16:20 -0500 Subject: [PATCH 410/536] deps: updates required checks list in github (#2136) * deps: updates required checks list in github * deps: updates snippet and system checks in github to remove 3.9 * changes the order of two items in the list. --- .github/sync-repo-settings.yaml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 6543d5285..188c44bbd 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -10,14 +10,10 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro snippets-3.8' - - 'Kokoro snippets-3.12' - - 'Kokoro system-3.8' - 'Kokoro system-3.12' + - 'Kokoro snippets-3.12' - 'cla/google' - 'Samples - Lint' - - 'Samples - Python 3.7' - - 'Samples - Python 3.8' - 'Samples - Python 3.9' - 'Samples - Python 3.10' - 'Samples - Python 3.11' @@ -28,8 +24,7 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro snippets-3.8' - 'cla/google' - 'Samples - Lint' - - 'Samples - Python 3.7' - - 'Samples - Python 3.8' + - 'Samples - Python 3.9' + - 'Samples - Python 3.10' From de10185c5faab6e9c6f12a6423be09ffb83c4ff6 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:04:23 -0800 Subject: [PATCH 411/536] chore(python): conditionally load credentials in .kokoro/build.sh (#2137) Source-Link: https://github.com/googleapis/synthtool/commit/aa69fb74717c8f4c58c60f8cc101d3f4b2c07b09 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/build.sh | 20 ++++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 4c0027ff1..3f7634f25 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:04c35dc5f49f0f503a306397d6d043685f8d2bb822ab515818c4208d7fb2db3a -# created: 2025-01-16T15:24:11.364245182Z + digest: sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf +# created: 2025-02-21T19:32:52.01306189Z diff --git a/.kokoro/build.sh b/.kokoro/build.sh index e4da2e2a7..d41b45aa1 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -15,11 +15,13 @@ set -eo pipefail +CURRENT_DIR=$(dirname "${BASH_SOURCE[0]}") + if [[ -z "${PROJECT_ROOT:-}" ]]; then - PROJECT_ROOT="github/python-bigquery" + PROJECT_ROOT=$(realpath "${CURRENT_DIR}/..") fi -cd "${PROJECT_ROOT}" +pushd "${PROJECT_ROOT}" # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 @@ -28,10 +30,16 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Setup service account credentials. -export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json +if [[ -f "${KOKORO_GFILE_DIR}/service-account.json" ]] +then + export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json +fi # Setup project id. -export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") +if [[ -f "${KOKORO_GFILE_DIR}/project-id.json" ]] +then + export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") +fi # If this is a continuous build, send the test log to the FlakyBot. # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. @@ -46,7 +54,7 @@ fi # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then - python3 -m nox -s ${NOX_SESSION:-} + python3 -m nox -s ${NOX_SESSION:-} else - python3 -m nox + python3 -m nox fi From c6d5f8aaec21ab8f17436407aded4bc2316323fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 25 Feb 2025 20:24:36 -0600 Subject: [PATCH 412/536] fix: retry 404 errors in `Client.query(...)` (#2135) * fix: retry 404 errors in `Client.query(...)` * retry on 404 * only retry notfound on jobs.insert * try to improve code coverage * disambiguate job not found from dataset/table not found * remove use of private attributes * fix unit tests * fix cover for retry.py --- google/cloud/bigquery/_job_helpers.py | 27 +++- google/cloud/bigquery/retry.py | 54 ++++++++ tests/unit/test_client.py | 174 +++++++++++++++++++++++++- 3 files changed, 252 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index e66ab2763..b028cd357 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -47,6 +47,7 @@ from google.cloud.bigquery import job import google.cloud.bigquery.query from google.cloud.bigquery import table +import google.cloud.bigquery.retry from google.cloud.bigquery.retry import POLLING_DEFAULT_VALUE # Avoid circular imports @@ -142,12 +143,28 @@ def do_query(): raise create_exc try: + # Sometimes we get a 404 after a Conflict. In this case, we + # have pretty high confidence that by retrying the 404, we'll + # (hopefully) eventually recover the job. + # https://github.com/googleapis/python-bigquery/issues/2134 + # + # Allow users who want to completely disable retries to + # continue to do so by setting retry to None. + get_job_retry = retry + if retry is not None: + # TODO(tswast): Amend the user's retry object with allowing + # 404 to retry when there's a public way to do so. + # https://github.com/googleapis/python-api-core/issues/796 + get_job_retry = ( + google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY + ) + query_job = client.get_job( job_id, project=project, location=location, - retry=retry, - timeout=timeout, + retry=get_job_retry, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ) except core_exceptions.GoogleAPIError: # (includes RetryError) raise @@ -156,7 +173,13 @@ def do_query(): else: return query_job + # Allow users who want to completely disable retries to + # continue to do so by setting job_retry to None. + if job_retry is not None: + do_query = google.cloud.bigquery.retry._DEFAULT_QUERY_JOB_INSERT_RETRY(do_query) + future = do_query() + # The future might be in a failed state now, but if it's # unrecoverable, we'll find out when we ask for it's result, at which # point, we may retry. diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 10958980d..999d0e851 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -82,6 +82,32 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + +def _should_retry_get_job_conflict(exc): + """Predicate for determining when to retry a jobs.get call after a conflict error. + + Sometimes we get a 404 after a Conflict. In this case, we + have pretty high confidence that by retrying the 404, we'll + (hopefully) eventually recover the job. + https://github.com/googleapis/python-bigquery/issues/2134 + + Note: we may be able to extend this to user-specified predicates + after https://github.com/googleapis/python-api-core/issues/796 + to tweak existing Retry object predicates. + """ + return isinstance(exc, exceptions.NotFound) or _should_retry(exc) + + +# Pick a deadline smaller than our other deadlines since we want to timeout +# before those expire. +_DEFAULT_GET_JOB_CONFLICT_DEADLINE = _DEFAULT_RETRY_DEADLINE / 3.0 +_DEFAULT_GET_JOB_CONFLICT_RETRY = retry.Retry( + predicate=_should_retry_get_job_conflict, + deadline=_DEFAULT_GET_JOB_CONFLICT_DEADLINE, +) +"""Private, may be removed in future.""" + + # Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We # briefly had a default timeout, but even setting it at more than twice the # theoretical server-side default timeout of 2 minutes was not enough for @@ -142,6 +168,34 @@ def _job_should_retry(exc): The default job retry object. """ + +def _query_job_insert_should_retry(exc): + # Per https://github.com/googleapis/python-bigquery/issues/2134, sometimes + # we get a 404 error. In this case, if we get this far, assume that the job + # doesn't actually exist and try again. We can't add 404 to the default + # job_retry because that happens for errors like "this table does not + # exist", which probably won't resolve with a retry. + if isinstance(exc, exceptions.RetryError): + exc = exc.cause + + if isinstance(exc, exceptions.NotFound): + message = exc.message + # Don't try to retry table/dataset not found, just job not found. + # The URL contains jobs, so use whitespace to disambiguate. + return message is not None and " job" in message.lower() + + return _job_should_retry(exc) + + +_DEFAULT_QUERY_JOB_INSERT_RETRY = retry.Retry( + predicate=_query_job_insert_should_retry, + # jobs.insert doesn't wait for the job to complete, so we don't need the + # long _DEFAULT_JOB_DEADLINE for this part. + deadline=_DEFAULT_RETRY_DEADLINE, +) +"""Private, may be removed in future.""" + + DEFAULT_GET_JOB_TIMEOUT = 128 """ Default timeout for Client.get_job(). diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 6897c2552..4f13d6ecc 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -28,9 +28,12 @@ from unittest import mock import warnings -import requests +import freezegun import packaging import pytest +import requests + +import google.api try: @@ -55,6 +58,8 @@ import google.cloud._helpers from google.cloud import bigquery +from google.cloud.bigquery import job as bqjob +import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions @@ -5308,6 +5313,173 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): with pytest.raises(DataLoss, match="we lost your job, sorry"): client.query("SELECT 1;", job_id=None) + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails_no_retries(self): + from google.api_core.exceptions import Conflict + from google.api_core.exceptions import DataLoss + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", side_effect=DataLoss("we lost your job, sorry") + ) + + with job_begin_patcher, get_job_patcher: + # If get job request fails but supposedly there does exist a job + # with this ID already, raise the exception explaining why we + # couldn't recover the job. + with pytest.raises(DataLoss, match="we lost your job, sorry"): + client.query( + "SELECT 1;", + job_id=None, + # Explicitly test with no retries to make sure those branches are covered. + retry=None, + job_retry=None, + ) + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404(self): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404, but we know + because of the conflict that really the job does exist. Retry until we + get the job status (or timeout). + """ + job_id = "abc123" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + # We're mocking QueryJob._begin, so this is only going to be + # jobs.get requests and responses. + google.api_core.exceptions.TooManyRequests("this is retriable by default"), + google.api_core.exceptions.NotFound("we lost your job"), + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": self.PROJECT, + "location": "TESTLOC", + "jobId": job_id, + } + }, + ) + + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + return_value=job_id, + ) + + with job_begin_patcher, job_id_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path] + * 4, + ) + assert result.job_id == job_id + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( + self, + ): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404. If it keeps + failing with a 404, assume that the job actually doesn't exist. + """ + job_id_1 = "abc123" + job_id_2 = "xyz789" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + # We're mocking QueryJob._begin, so that the connection should only get + # jobs.get requests. + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": self.PROJECT, + "location": "TESTLOC", + "jobId": job_id_2, + } + }, + ) + + # Choose a small deadline so the 404 retries give up. + retry = ( + google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + side_effect=[job_id_1, job_id_2], + ) + retry_patcher = mock.patch.object( + google.cloud.bigquery.retry, + "_DEFAULT_GET_JOB_CONFLICT_RETRY", + retry, + ) + + with freezegun.freeze_time( + "2025-01-01 00:00:00", + # 10x the retry deadline to guarantee a timeout. + auto_tick_seconds=10, + ), job_begin_patcher, job_id_patcher, retry_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path_1 = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id_1}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + jobs_get_path_2 = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id_2}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path_1, jobs_get_path_2], + ) + assert result.job_id == job_id_2 + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): from google.api_core.exceptions import Conflict from google.cloud.bigquery.job import QueryJob From 3a1e3f3378f370986298b9bca652fdf2b5d9d14e Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 27 Feb 2025 10:42:38 -0800 Subject: [PATCH 413/536] chore(main): release 3.30.0 (#2119) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 45c39e19c..91d0a362d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,32 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.30.0](https://github.com/googleapis/python-bigquery/compare/v3.29.0...v3.30.0) (2025-02-26) + + +### Features + +* Add roundingmode enum, wiring, and tests ([#2121](https://github.com/googleapis/python-bigquery/issues/2121)) ([3a48948](https://github.com/googleapis/python-bigquery/commit/3a4894827f6e73a4a88cb22933c2004697dabcc7)) +* Adds foreign_type_info attribute to table class and adds unit tests. ([#2126](https://github.com/googleapis/python-bigquery/issues/2126)) ([2c19681](https://github.com/googleapis/python-bigquery/commit/2c1968115bef8e1dc84e0125615f551b9b011a4b)) +* Support resource_tags for table ([#2093](https://github.com/googleapis/python-bigquery/issues/2093)) ([d4070ca](https://github.com/googleapis/python-bigquery/commit/d4070ca21b5797e900a9e87b966837ee1c278217)) + + +### Bug Fixes + +* Avoid blocking in download thread when using BQ Storage API ([#2034](https://github.com/googleapis/python-bigquery/issues/2034)) ([54c8d07](https://github.com/googleapis/python-bigquery/commit/54c8d07f06a8ae460c9e0fb1614e1fbc21efb5df)) +* Retry 404 errors in `Client.query(...)` ([#2135](https://github.com/googleapis/python-bigquery/issues/2135)) ([c6d5f8a](https://github.com/googleapis/python-bigquery/commit/c6d5f8aaec21ab8f17436407aded4bc2316323fd)) + + +### Dependencies + +* Updates required checks list in github ([#2136](https://github.com/googleapis/python-bigquery/issues/2136)) ([fea49ff](https://github.com/googleapis/python-bigquery/commit/fea49ffbf8aa1d53451864ceb7fd73189b6661cb)) +* Use pandas-gbq to determine schema in `load_table_from_dataframe` ([#2095](https://github.com/googleapis/python-bigquery/issues/2095)) ([7603bd7](https://github.com/googleapis/python-bigquery/commit/7603bd71d60592ef2a551d9eea09987b218edc73)) + + +### Documentation + +* Update magics.rst ([#2125](https://github.com/googleapis/python-bigquery/issues/2125)) ([b5bcfb3](https://github.com/googleapis/python-bigquery/commit/b5bcfb303d27015b747a3b0747ecd7f7ed0ed557)) + ## [3.29.0](https://github.com/googleapis/python-bigquery/compare/v3.28.0...v3.29.0) (2025-01-21) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 3d852b8a3..01c4c51ca 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.29.0" +__version__ = "3.30.0" From fb7de398cb2ad000b80a8a702d1f6539dc03d8e0 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 28 Feb 2025 13:14:05 -0500 Subject: [PATCH 414/536] deps: Remove Python 3.7 and 3.8 as supported runtimes (#2133) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Initial batch of changes to remove 3.7 and 3.8 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * more updates to remove 3.7 and 3.8 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates samples/geography/reqs * updates samples/magics/reqs * updates samples/notebooks/reqs * updates linting * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates conf due to linting issue * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates reqs.txt, fix mypy, lint, and debug in noxfile * Updates owlbot to correct spacing issue in conf.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates owlbot imports * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removes kokoro samples configs for 3.7 & 3.8 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removes owlbots attempt to restore kokoro samples configs * removes kokoro system-3.8.cfg * edits repo sync settings * updates assorted noxfiles for samples and pyproject.toml * update test-samples-impl.sh * updates install_deps template * Edits to the contributing documentation * deps: use pandas-gbq to determine schema in `load_table_from_dataframe` (#2095) * feat: use pandas-gbq to determine schema in `load_table_from_dataframe` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix some unit tests * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * bump minimum pandas-gbq to 0.26.1 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * drop pandas-gbq from python 3.7 extras * relax warning message text assertion * use consistent time zone presense/absense in time datetime system test * Update google/cloud/bigquery/_pandas_helpers.py * Update google/cloud/bigquery/_pandas_helpers.py Co-authored-by: Chalmer Lowe * remove pandas-gbq from at least 1 unit test and system test session --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe * Feat: Adds foreign_type_info attribute to table class and adds unit tests. (#2126) * adds foreign_type_info attribute to table * feat: Adds foreign_type_info attribute and tests * updates docstrings for foreign_type_info * Updates property handling, especially as regards set/get_sub_prop * Removes extraneous comments and debug expressions * Refactors build_resource_from_properties w get/set_sub_prop * updates to foreign_type_info, tests and wiring * Adds logic to detect non-Sequence schema.fields value * updates assorted tests and logic * deps: updates required checks list in github (#2136) * deps: updates required checks list in github * deps: updates snippet and system checks in github to remove 3.9 * changes the order of two items in the list. * updates linting * reverts pandas back to 1.1.0 * Revert changes related to pandas <1.5 * Revert noxfile.py changes related to pandas <1.5 * Revert constraints-3.9 changes related to pandas <1.5 * Revert test_query_pandas.py changes related to pandas <1.5 * Revert test__pandas_helpers.py changes related to pandas <1.5 * Revert test__versions_helpers.py changes related to pandas <1.5 * Revert tnoxfile.py changes related to pandas <1.5 * Revert test__versions_helpers.py changes related to pandas <1.5 * Revert test_table.py changes related to pandas <1.5 * Update noxfile changes related to pandas <1.5 * Update pyproject.toml changes related to pandas <1.5 * Update constraints-3.9.txt changes related to pandas <1.5 * Update test_legacy_types.py changes related to pandas <1.5 * Updates magics.py as part of reverting from pandas 1.5 * Updates noxfile.py in reverting from pandas 1.5 * Updates pyproject.toml in reverting from pandas 1.5 * Updates constraints.txt in reverting from pandas 1.5 * Updates test_magics in reverting from pandas 1.5 * Updates test_table in reverting from pandas 1.5 * Updates in tests re: reverting from pandas 1.5 * Updates pyproject to match constraints.txt * updates pyproject.toml to mirror constraints * remove limit on virtualenv * updates owlbot.py for test-samples-impl.sh * updates to owlbot.py * updates to test-samples-impl.sh * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * further updates to owlbot.py * removes unneeded files * adds presubmit.cfg back in --------- Co-authored-by: Owl Bot Co-authored-by: Tim Sweña (Swast) --- .coveragerc | 2 +- .../{snippets-3.8.cfg => snippets-3.9.cfg} | 2 +- .../{system-3.8.cfg => system-3.9.cfg} | 4 +- .kokoro/samples/python3.7/common.cfg | 40 ----------- .kokoro/samples/python3.7/continuous.cfg | 6 -- .kokoro/samples/python3.7/periodic-head.cfg | 11 ---- .kokoro/samples/python3.7/periodic.cfg | 6 -- .kokoro/samples/python3.7/presubmit.cfg | 6 -- .kokoro/samples/python3.8/common.cfg | 40 ----------- .kokoro/samples/python3.8/continuous.cfg | 6 -- .kokoro/samples/python3.8/periodic-head.cfg | 11 ---- .kokoro/samples/python3.8/periodic.cfg | 6 -- .kokoro/samples/python3.8/presubmit.cfg | 6 -- .kokoro/test-samples-impl.sh | 3 +- CONTRIBUTING.rst | 15 ++--- README.rst | 4 +- google/cloud/bigquery/__init__.py | 6 +- google/cloud/bigquery/_pandas_helpers.py | 9 ++- google/cloud/bigquery/_pyarrow_helpers.py | 14 ++-- google/cloud/bigquery/external_config.py | 6 +- google/cloud/bigquery/format_options.py | 4 +- google/cloud/bigquery/job/base.py | 4 +- google/cloud/bigquery/magics/magics.py | 1 - google/cloud/bigquery/model.py | 2 +- google/cloud/bigquery/routine/routine.py | 16 +++-- google/cloud/bigquery/schema.py | 6 +- google/cloud/bigquery/table.py | 14 ++-- noxfile.py | 43 ++++++++---- owlbot.py | 66 ++++++++++++++++++- pyproject.toml | 21 +++--- samples/desktopapp/noxfile.py | 4 +- samples/desktopapp/requirements-test.txt | 3 +- samples/desktopapp/user_credentials_test.py | 9 +-- samples/geography/noxfile.py | 4 +- samples/geography/requirements-test.txt | 3 +- samples/geography/requirements.txt | 56 +++++----------- samples/magics/conftest.py | 6 +- samples/magics/noxfile.py | 4 +- samples/magics/requirements-test.txt | 3 +- samples/magics/requirements.txt | 8 +-- samples/notebooks/jupyter_tutorial_test.py | 4 +- samples/notebooks/noxfile.py | 4 +- samples/notebooks/requirements-test.txt | 3 +- samples/notebooks/requirements.txt | 10 +-- samples/snippets/noxfile.py | 4 +- samples/snippets/requirements-test.txt | 3 +- .../templates/install_deps.tmpl.rst | 2 +- testing/constraints-3.7.txt | 36 ---------- testing/constraints-3.8.txt | 11 ---- testing/constraints-3.9.txt | 26 +++++++- tests/system/test_pandas.py | 7 +- tests/unit/job/test_copy.py | 1 - tests/unit/job/test_query_pandas.py | 2 + tests/unit/test__pandas_helpers.py | 5 +- tests/unit/test__versions_helpers.py | 9 ++- tests/unit/test_legacy_types.py | 4 +- tests/unit/test_magics.py | 1 + tests/unit/test_table.py | 27 ++++---- tests/unit/test_table_pandas.py | 12 ++-- 59 files changed, 274 insertions(+), 377 deletions(-) rename .kokoro/presubmit/{snippets-3.8.cfg => snippets-3.9.cfg} (82%) rename .kokoro/presubmit/{system-3.8.cfg => system-3.9.cfg} (81%) delete mode 100644 .kokoro/samples/python3.7/common.cfg delete mode 100644 .kokoro/samples/python3.7/continuous.cfg delete mode 100644 .kokoro/samples/python3.7/periodic-head.cfg delete mode 100644 .kokoro/samples/python3.7/periodic.cfg delete mode 100644 .kokoro/samples/python3.7/presubmit.cfg delete mode 100644 .kokoro/samples/python3.8/common.cfg delete mode 100644 .kokoro/samples/python3.8/continuous.cfg delete mode 100644 .kokoro/samples/python3.8/periodic-head.cfg delete mode 100644 .kokoro/samples/python3.8/periodic.cfg delete mode 100644 .kokoro/samples/python3.8/presubmit.cfg delete mode 100644 testing/constraints-3.7.txt delete mode 100644 testing/constraints-3.8.txt diff --git a/.coveragerc b/.coveragerc index 04092257a..e78e7a931 100644 --- a/.coveragerc +++ b/.coveragerc @@ -9,6 +9,6 @@ omit = google/cloud/bigquery_v2/* # Legacy proto-based types. exclude_lines = # Re-enable the standard pragma - pragma: NO COVER + pragma: (no cover|NO COVER) # Ignore debug-only repr def __repr__ diff --git a/.kokoro/presubmit/snippets-3.8.cfg b/.kokoro/presubmit/snippets-3.9.cfg similarity index 82% rename from .kokoro/presubmit/snippets-3.8.cfg rename to .kokoro/presubmit/snippets-3.9.cfg index 840d9e716..d1de209a2 100644 --- a/.kokoro/presubmit/snippets-3.8.cfg +++ b/.kokoro/presubmit/snippets-3.9.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-3.8" + value: "snippets-3.9" } diff --git a/.kokoro/presubmit/system-3.8.cfg b/.kokoro/presubmit/system-3.9.cfg similarity index 81% rename from .kokoro/presubmit/system-3.8.cfg rename to .kokoro/presubmit/system-3.9.cfg index f4bcee3db..bd1fb514b 100644 --- a/.kokoro/presubmit/system-3.8.cfg +++ b/.kokoro/presubmit/system-3.9.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "system-3.8" -} \ No newline at end of file + value: "system-3.9" +} diff --git a/.kokoro/samples/python3.7/common.cfg b/.kokoro/samples/python3.7/common.cfg deleted file mode 100644 index d30dc6018..000000000 --- a/.kokoro/samples/python3.7/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.7" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py37" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.7/continuous.cfg b/.kokoro/samples/python3.7/continuous.cfg deleted file mode 100644 index a1c8d9759..000000000 --- a/.kokoro/samples/python3.7/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/.kokoro/samples/python3.7/periodic-head.cfg b/.kokoro/samples/python3.7/periodic-head.cfg deleted file mode 100644 index 5aa01bab5..000000000 --- a/.kokoro/samples/python3.7/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/.kokoro/samples/python3.7/periodic.cfg b/.kokoro/samples/python3.7/periodic.cfg deleted file mode 100644 index 71cd1e597..000000000 --- a/.kokoro/samples/python3.7/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/.kokoro/samples/python3.7/presubmit.cfg b/.kokoro/samples/python3.7/presubmit.cfg deleted file mode 100644 index a1c8d9759..000000000 --- a/.kokoro/samples/python3.7/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/.kokoro/samples/python3.8/common.cfg b/.kokoro/samples/python3.8/common.cfg deleted file mode 100644 index 46759c6d6..000000000 --- a/.kokoro/samples/python3.8/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.8" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py38" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.8/continuous.cfg b/.kokoro/samples/python3.8/continuous.cfg deleted file mode 100644 index a1c8d9759..000000000 --- a/.kokoro/samples/python3.8/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/.kokoro/samples/python3.8/periodic-head.cfg b/.kokoro/samples/python3.8/periodic-head.cfg deleted file mode 100644 index 5aa01bab5..000000000 --- a/.kokoro/samples/python3.8/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/.kokoro/samples/python3.8/periodic.cfg b/.kokoro/samples/python3.8/periodic.cfg deleted file mode 100644 index 71cd1e597..000000000 --- a/.kokoro/samples/python3.8/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/.kokoro/samples/python3.8/presubmit.cfg b/.kokoro/samples/python3.8/presubmit.cfg deleted file mode 100644 index a1c8d9759..000000000 --- a/.kokoro/samples/python3.8/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 53e365bc4..40e248822 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -33,8 +33,7 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Install nox -# `virtualenv==20.26.6` is added for Python 3.7 compatibility -python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6 +python3.9 -m pip install --upgrade --quiet nox virtualenv # Use secrets acessor service account to get secrets if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 1900c5e36..8f4d54bce 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. + 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -143,13 +143,12 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.8 -- -k + $ nox -s system-3.9 -- -k .. note:: - System tests are only configured to run under Python 3.8. - For expediency, we do not run them in older versions of Python 3. + System tests are configured to run under Python 3.9, 3.11, 3.12. This alone will not run the tests. You'll need to change some local auth settings and change some configuration in your project to @@ -195,11 +194,11 @@ configure them just like the System Tests. # Run all tests in a folder $ cd samples/snippets - $ nox -s py-3.8 + $ nox -s py-3.9 # Run a single sample test $ cd samples/snippets - $ nox -s py-3.8 -- -k + $ nox -s py-3.9 -- -k ******************************************** Note About ``README`` as it pertains to PyPI @@ -221,16 +220,12 @@ Supported Python Versions We support: -- `Python 3.7`_ -- `Python 3.8`_ - `Python 3.9`_ - `Python 3.10`_ - `Python 3.11`_ - `Python 3.12`_ - `Python 3.13`_ -.. _Python 3.7: https://docs.python.org/3.7/ -.. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ diff --git a/README.rst b/README.rst index f81adc4b9..29e15e067 100644 --- a/README.rst +++ b/README.rst @@ -52,11 +52,11 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.7 +Python >= 3.9 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Python == 2.7, Python == 3.5, Python == 3.6. +Python == 2.7, Python == 3.5, Python == 3.6, Python == 3.7, and Python == 3.8. The last version of this library compatible with Python 2.7 and 3.5 is `google-cloud-bigquery==1.28.0`. diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index caf75333a..d39c71641 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -126,12 +126,12 @@ if sys_major == 3 and sys_minor in (7, 8): warnings.warn( - "The python-bigquery library will stop supporting Python 3.7 " - "and Python 3.8 in a future major release expected in Q4 2024. " + "The python-bigquery library no longer supports Python 3.7 " + "and Python 3.8. " f"Your Python version is {sys_major}.{sys_minor}.{sys_micro}. We " "recommend that you update soon to ensure ongoing support. For " "more details, see: [Google Cloud Client Libraries Supported Python Versions policy](https://cloud.google.com/python/docs/supported-python-versions)", - PendingDeprecationWarning, + FutureWarning, ) __all__ = [ diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 0017d92ce..feb6b3adb 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -336,8 +336,13 @@ def types_mapper(arrow_data_type): ): return range_date_dtype - elif range_timestamp_dtype is not None and arrow_data_type.equals( - range_timestamp_dtype.pyarrow_dtype + # TODO: this section does not have a test yet OR at least not one that is + # recognized by coverage, hence the pragma. See Issue: #2132 + elif ( + range_timestamp_dtype is not None + and arrow_data_type.equals( # pragma: NO COVER + range_timestamp_dtype.pyarrow_dtype + ) ): return range_timestamp_dtype diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 1b42cd5c7..034e020ee 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -21,8 +21,6 @@ from typing import Any -from packaging import version - try: import pyarrow # type: ignore except ImportError: @@ -101,14 +99,10 @@ def pyarrow_timestamp(): pyarrow.decimal128(38, scale=9).id: "NUMERIC", } - # Adds bignumeric support only if pyarrow version >= 3.0.0 - # Decimal256 support was added to arrow 3.0.0 - # https://arrow.apache.org/blog/2021/01/25/3.0.0-release/ - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" def bq_to_arrow_scalars(bq_scalar: str): diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 73c4acabf..6e943adf3 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -835,8 +835,10 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema """ - prop = self._properties.get("schema", {}) - return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + prop = self._properties.get("schema", {}) # type: ignore + return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] # type: ignore @schema.setter def schema(self, value): diff --git a/google/cloud/bigquery/format_options.py b/google/cloud/bigquery/format_options.py index ad5591b1c..e26b7a74f 100644 --- a/google/cloud/bigquery/format_options.py +++ b/google/cloud/bigquery/format_options.py @@ -13,7 +13,7 @@ # limitations under the License. import copy -from typing import Dict, Optional +from typing import Dict, Optional, Union class AvroOptions: @@ -106,7 +106,7 @@ def enable_list_inference(self, value: bool) -> None: self._properties["enableListInference"] = value @property - def map_target_type(self) -> str: + def map_target_type(self) -> Optional[Union[bool, str]]: """Indicates whether to simplify the representation of parquet maps to only show keys and values.""" return self._properties.get("mapTargetType") diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index e5f68c843..eaa9d3460 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -415,7 +415,9 @@ def __init__(self, job_id, client): @property def configuration(self) -> _JobConfig: """Job-type specific configurtion.""" - configuration = self._CONFIG_CLASS() + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + configuration = self._CONFIG_CLASS() # pytype: disable=not-callable configuration._properties = self._properties.setdefault("configuration", {}) return configuration diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index a5be95185..1f892b595 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -55,7 +55,6 @@ except ImportError: bigquery_magics = None - IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 45a88ab22..16581be5a 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -58,7 +58,7 @@ def __init__(self, model_ref: Union["ModelReference", str, None]): # semantics. The BigQuery API makes a distinction between an unset # value, a null value, and a default value (0 or ""), but the protocol # buffer classes do not. - self._properties = {} + self._properties: Dict[str, Any] = {} if isinstance(model_ref, str): model_ref = ModelReference.from_string(model_ref) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 83cb6362d..7e079781d 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -16,7 +16,7 @@ """Define resources for the BigQuery Routines API.""" -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers @@ -216,7 +216,7 @@ def return_type(self, value: StandardSqlDataType): self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource @property - def return_table_type(self) -> Optional[StandardSqlTableType]: + def return_table_type(self) -> Union[StandardSqlTableType, Any, None]: """The return type of a Table Valued Function (TVF) routine. .. versionadded:: 2.22.0 @@ -518,17 +518,23 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - return self._properties["projectId"] # pytype: disable=key-error + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + return self._properties["projectId"] # pytype: disable=typed-dict-error @property def dataset_id(self): """str: ID of dataset containing the routine.""" - return self._properties["datasetId"] # pytype: disable=key-error + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + return self._properties["datasetId"] # pytype: disable=typed-dict-error @property def routine_id(self): """str: The routine ID.""" - return self._properties["routineId"] # pytype: disable=key-error + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + return self._properties["routineId"] # pytype: disable=typed-dict-error @property def path(self): diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 03cde830e..749b0a00e 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -232,8 +232,12 @@ def __init__( if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length if policy_tags is not _DEFAULT_VALUE: + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 self._properties["policyTags"] = ( - policy_tags.to_api_repr() if policy_tags is not None else None + policy_tags.to_api_repr() # pytype: disable=attribute-error + if policy_tags is not None + else None ) if isinstance(range_element_type, str): self._properties["rangeElementType"] = {"type": range_element_type} diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index c70a0ebea..e7f3c9a36 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -137,6 +137,8 @@ def _reference_getter(table): return TableReference(dataset_ref, table.table_id) +# TODO: The typehinting for this needs work. Setting this pragma to temporarily +# manage a pytype issue that came up in another PR. See Issue: #2132 def _view_use_legacy_sql_getter(table): """bool: Specifies whether to execute the view with Legacy or Standard SQL. @@ -148,10 +150,11 @@ def _view_use_legacy_sql_getter(table): Raises: ValueError: For invalid value types. """ - view = table._properties.get("view") + + view = table._properties.get("view") # type: ignore if view is not None: # The server-side default for useLegacySql is True. - return view.get("useLegacySql", True) + return view.get("useLegacySql", True) # type: ignore # In some cases, such as in a table list no view object is present, but the # resource still represents a view. Use the type as a fallback. if table.table_type == "VIEW": @@ -375,7 +378,7 @@ class Table(_TableBase): :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. """ - _PROPERTY_TO_API_FIELD = { + _PROPERTY_TO_API_FIELD: Dict[str, Any] = { **_TableBase._PROPERTY_TO_API_FIELD, "clustering_fields": "clustering", "created": "creationTime", @@ -418,7 +421,10 @@ class Table(_TableBase): def __init__(self, table_ref, schema=None) -> None: table_ref = _table_arg_to_table_ref(table_ref) - self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} + self._properties: Dict[str, Any] = { + "tableReference": table_ref.to_api_repr(), + "labels": {}, + } # Let the @property do validation. if schema is not None: self.schema = schema diff --git a/noxfile.py b/noxfile.py index 87bd9a70c..f069f8d37 100644 --- a/noxfile.py +++ b/noxfile.py @@ -24,7 +24,7 @@ MYPY_VERSION = "mypy==1.6.1" -PYTYPE_VERSION = "pytype==2021.4.9" +PYTYPE_VERSION = "pytype==2024.9.13" BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ( "benchmark", @@ -37,9 +37,9 @@ "setup.py", ) -DEFAULT_PYTHON_VERSION = "3.8" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11", "3.12"] -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.12"] +DEFAULT_PYTHON_VERSION = "3.9" +SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() @@ -102,10 +102,16 @@ def default(session, install_extras=True): "-c", constraints_path, ) - - if install_extras and session.python in ["3.11", "3.12"]: - install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" - elif install_extras: + # We have logic in the magics.py file that checks for whether 'bigquery_magics' + # is imported OR not. If yes, we use a context object from that library. + # If no, we use our own context object from magics.py. In order to exercise + # that logic (and the associated tests) we avoid installing the [ipython] extra + # which has a downstream effect of then avoiding installing bigquery_magics. + if install_extras and session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + install_target = ( + ".[bqstorage,pandas,ipywidgets,geopandas,tqdm,opentelemetry,bigquery_v2]" + ) + elif install_extras: # run against all other UNIT_TEST_PYTHON_VERSIONS install_target = ".[all]" else: install_target = "." @@ -157,7 +163,7 @@ def unit_noextras(session): # so that it continues to be an optional dependency. # https://github.com/googleapis/python-bigquery/issues/1877 if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - session.install("pyarrow==1.0.0") + session.install("pyarrow==4.0.0") default(session, install_extras=False) @@ -178,6 +184,7 @@ def mypy(session): "types-requests", "types-setuptools", ) + session.run("python", "-m", "pip", "freeze") session.run("mypy", "-p", "google", "--show-traceback") @@ -192,6 +199,7 @@ def pytype(session): session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) + session.run("python", "-m", "pip", "freeze") # See https://github.com/google/pytype/issues/464 session.run("pytype", "-P", ".", "google/cloud/bigquery") @@ -281,7 +289,7 @@ def mypy_samples(session): "types-setuptools", ) - session.install("typing-extensions") # for TypedDict in pre-3.8 Python versions + session.run("python", "-m", "pip", "freeze") session.run( "mypy", @@ -307,10 +315,13 @@ def snippets(session): session.install("grpcio", "-c", constraints_path) if session.python in ["3.11", "3.12"]: - extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" + extras = ( + "[bqstorage,pandas,ipywidgets,geopandas,tqdm,opentelemetry,bigquery_v2]" + ) else: extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) + session.run("python", "-m", "pip", "freeze") # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session @@ -339,6 +350,7 @@ def cover(session): """ session.install("coverage", "pytest-cov") + session.run("python", "-m", "pip", "freeze") session.run("coverage", "report", "--show-missing", "--fail-under=100") session.run("coverage", "erase") @@ -378,6 +390,7 @@ def prerelease_deps(session): "google-cloud-bigquery-storage", "google-cloud-core", "google-resumable-media", + "db-dtypes", # Exclude version 1.49.0rc1 which has a known issue. See https://github.com/grpc/grpc/pull/30642 "grpcio!=1.49.0rc1", ) @@ -417,9 +430,6 @@ def prerelease_deps(session): session.install("--no-deps", "-e", ".[all]") # Print out prerelease package versions. - session.run("python", "-c", "import grpc; print(grpc.__version__)") - session.run("python", "-c", "import pandas; print(pandas.__version__)") - session.run("python", "-c", "import pyarrow; print(pyarrow.__version__)") session.run("python", "-m", "pip", "freeze") # Run all tests, except a few samples tests which require extra dependencies. @@ -453,6 +463,7 @@ def lint(session): session.install("flake8", BLACK_VERSION) session.install("-e", ".") + session.run("python", "-m", "pip", "freeze") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") session.run("flake8", os.path.join("docs", "samples")) @@ -467,6 +478,7 @@ def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" session.install("docutils", "Pygments") + session.run("python", "-m", "pip", "freeze") session.run("python", "setup.py", "check", "--restructuredtext", "--strict") @@ -478,6 +490,7 @@ def blacken(session): """ session.install(BLACK_VERSION) + session.run("python", "-m", "pip", "freeze") session.run("black", *BLACK_PATHS) @@ -504,6 +517,7 @@ def docs(session): session.install("-e", ".[all]") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run("python", "-m", "pip", "freeze") session.run( "sphinx-build", "-W", # warnings as errors @@ -540,6 +554,7 @@ def docfx(session): ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run("python", "-m", "pip", "freeze") session.run( "sphinx-build", "-T", # show full traceback on exception diff --git a/owlbot.py b/owlbot.py index 07805d11a..c8efaa98d 100644 --- a/owlbot.py +++ b/owlbot.py @@ -14,7 +14,6 @@ """This script is used to synthesize generated parts of this library.""" from pathlib import Path -import textwrap import synthtool as s from synthtool import gcp @@ -70,13 +69,65 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", + ".kokoro/presubmit/system-3.8.cfg", ".kokoro/continuous/prerelease-deps.cfg", + ".kokoro/samples/python3.7/**", + ".kokoro/samples/python3.8/**", ".github/workflows", # exclude gh actions as credentials are needed for tests - "README.rst", + "README.rst", ], ) python.configure_previous_major_version_branches() + +s.replace( + ".kokoro/test-samples-impl.sh", + """# `virtualenv==20.26.6` is added for Python 3.7 compatibility +python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6""", + "python3.9 -m pip install --upgrade --quiet nox virtualenv", +) +s.replace( + "CONTRIBUTING.rst", + "3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", + "3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", +) +s.replace( + "CONTRIBUTING.rst", + r" \$ nox -s system-3.8 -- -k ", + r" $ nox -s system-3.9 -- -k ", +) +s.replace( + "CONTRIBUTING.rst", + r"""System tests are only configured to run under Python 3.8. + For expediency, we do not run them in older versions of Python 3.""", + r"System tests are configured to run under Python 3.9, 3.11, 3.12.", +) +s.replace( + "CONTRIBUTING.rst", + r"\$ nox -s py-3.8", + r"$ nox -s py-3.9", +) +s.replace( + "CONTRIBUTING.rst", + r"""- `Python 3.7`_ +- `Python 3.8`_ +""", + r"", +) +s.replace( + "CONTRIBUTING.rst", + r""".. _Python 3.7: https://docs.python.org/3.7/ +.. _Python 3.8: https://docs.python.org/3.8/ +""", + r"", +) +s.replace( + "scripts/readme-gen/templates/install_deps.tmpl.rst", + r"Samples are compatible with Python 3.7", + r"Samples are compatible with Python 3.9", +) + + # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- @@ -93,6 +144,17 @@ r"exclude_patterns = \[", '\\g<0>\n "google/cloud/bigquery_v2/**", # Legacy proto-based types.', ) +s.replace( + "samples/**/noxfile.py", + 'BLACK_VERSION = "black==22.3.0"', + 'BLACK_VERSION = "black==23.7.0"', +) +s.replace( + "samples/**/noxfile.py", + r'ALL_VERSIONS = \["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"\]', + 'ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]', +) + # ---------------------------------------------------------------------------- # pytype-related changes diff --git a/pyproject.toml b/pyproject.toml index c4e5c2f0d..8822fc57d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ build-backend = "setuptools.build_meta" name = "google-cloud-bigquery" authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }] license = { text = "Apache 2.0" } -requires-python = ">=3.7" +requires-python = ">=3.9" description = "Google BigQuery API client library" readme = "README.rst" classifiers = [ @@ -33,8 +33,6 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -47,8 +45,8 @@ dependencies = [ "google-auth >= 2.14.1, < 3.0.0dev", "google-cloud-core >= 2.4.1, < 3.0.0dev", "google-resumable-media >= 2.0.0, < 3.0dev", - "packaging >= 20.0.0", - "python-dateutil >= 2.7.3, < 3.0dev", + "packaging >= 24.2.0", + "python-dateutil >= 2.8.2, < 3.0dev", "requests >= 2.21.0, < 3.0.0dev", ] dynamic = ["version"] @@ -61,7 +59,7 @@ Repository = "https://github.com/googleapis/python-bigquery" # moved back to optional due to bloat. See # https://github.com/googleapis/python-bigquery/issues/1196 for more background. bqstorage = [ - "google-cloud-bigquery-storage >= 2.6.0, < 3.0.0dev", + "google-cloud-bigquery-storage >= 2.18.0, < 3.0.0dev", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. @@ -70,20 +68,19 @@ bqstorage = [ # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.47.0, < 2.0dev", "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", - "pyarrow >= 3.0.0", + "pyarrow >= 4.0.0", ] pandas = [ - "pandas >= 1.1.0", + "pandas >= 1.1.4", "pandas-gbq >= 0.26.1; python_version >= '3.8'", "grpcio >= 1.47.0, < 2.0dev", "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", "pyarrow >= 3.0.0", - "db-dtypes >= 0.3.0, < 2.0.0dev", - "importlib_metadata >= 1.0.0; python_version < '3.8'", + "db-dtypes >= 1.0.4, < 2.0.0dev", ] -ipywidgets = ["ipywidgets >= 7.7.0", "ipykernel >= 6.0.0"] +ipywidgets = ["ipywidgets >= 7.7.1", "ipykernel >= 6.2.0"] geopandas = ["geopandas >= 0.9.0, < 2.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] -ipython = ["bigquery-magics >= 0.1.0"] +ipython = ["ipython >= 7.23.1", "bigquery-magics >= 0.6.0"] tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] opentelemetry = [ "opentelemetry-api >= 1.1.0", diff --git a/samples/desktopapp/noxfile.py b/samples/desktopapp/noxfile.py index c9a3d1ecb..a86590382 100644 --- a/samples/desktopapp/noxfile.py +++ b/samples/desktopapp/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 827b02dcf..cf215e2fd 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,3 @@ google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/samples/desktopapp/user_credentials_test.py b/samples/desktopapp/user_credentials_test.py index 252b843c4..d14798d9b 100644 --- a/samples/desktopapp/user_credentials_test.py +++ b/samples/desktopapp/user_credentials_test.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import sys from typing import Iterator, Union from unittest import mock @@ -24,13 +23,7 @@ PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] - -if sys.version_info >= (3, 8): - # Python 3.8+ has an AsyncMock attribute in unittest.mock, but 3.7 does not - MockType = Union[mock.MagicMock, mock.AsyncMock] -else: - # Other definitions and imports - MockType = Union[mock.MagicMock] +MockType = Union[mock.MagicMock, mock.AsyncMock] @pytest.fixture diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index c9a3d1ecb..a86590382 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index ef38acb4f..4ad1bd028 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,2 @@ -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 71579867f..0ad2154a4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,63 +1,41 @@ -attrs===24.2.0; python_version == '3.7' -attrs==24.3.0; python_version >= '3.8' +attrs==24.3.0 certifi==2024.12.14 -cffi===1.15.1; python_version == '3.7' -cffi==1.17.1; python_version >= '3.8' +cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 -dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.1 -Fiona===1.9.6; python_version == '3.7' -Fiona==1.10.1; python_version >= '3.8' +Fiona==1.10.1 geojson==3.2.0 -geopandas===0.10.2; python_version == '3.7' -geopandas===0.13.2; python_version == '3.8' -geopandas==1.0.1; python_version >= '3.9' +geopandas==1.0.1 google-api-core==2.24.0 google-auth==2.37.0 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 google-cloud-core==2.4.1 -google-crc32c===1.5.0; python_version < '3.9' -google-crc32c==1.6.0; python_version >= '3.9' +google-crc32c==1.6.0 google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 -grpcio===1.62.2; python_version == '3.7' -grpcio==1.69.0; python_version >= '3.8' +grpcio==1.69.0 idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 -packaging===24.0; python_version == '3.7' -packaging==24.2; python_version >= '3.8' -pandas===1.3.5; python_version == '3.7' -pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +packaging==24.2 +pandas==2.2.3 proto-plus==1.25.0 -pyarrow===12.0.1; python_version == '3.7' -pyarrow===17.0.0; python_version == '3.8' -pyarrow==18.1.0; python_version >= '3.9' -pyasn1===0.5.1; python_version == '3.7' -pyasn1==0.6.1; python_version >= '3.8' -pyasn1-modules===0.3.0; python_version == '3.7' -pyasn1-modules==0.4.1; python_version >= '3.8' -pycparser===2.21; python_version == '3.7' -pycparser==2.22; python_version >= '3.8' -pyparsing===3.1.4; python_version < '3.9' -pyparsing==3.2.1; python_version >= '3.9' +pyarrow==18.1.0 +pyasn1==0.6.1 +pyasn1-modules==0.4.1 +pycparser==2.22 +pyparsing==3.2.1 python-dateutil==2.9.0.post0 pytz==2024.2 -PyYAML===6.0.1; python_version == '3.7' -PyYAML==6.0.2; python_version >= '3.8' -requests==2.31.0; python_version == '3.7' -requests==2.32.3; python_version >= '3.8' +PyYAML==6.0.2 +requests==2.32.3 rsa==4.9 Shapely==2.0.6 six==1.17.0 -typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.12.2; python_version >= '3.8' +typing-extensions==4.12.2 typing-inspect==0.9.0 -urllib3===1.26.18; python_version == '3.7' -urllib3===2.2.3; python_version == '3.8' -urllib3==2.3.0; python_version >= '3.9' +urllib3==2.3.0 diff --git a/samples/magics/conftest.py b/samples/magics/conftest.py index 55ea30f90..0943c535a 100644 --- a/samples/magics/conftest.py +++ b/samples/magics/conftest.py @@ -18,7 +18,7 @@ import pytest if typing.TYPE_CHECKING: - from IPython.core.interactiveshell import TerminalInteractiveShell + from IPython.terminal.interactiveshell import TerminalInteractiveShell interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") tools = pytest.importorskip("IPython.testing.tools") @@ -40,5 +40,7 @@ def ipython_interactive( for the duration of the test scope. """ - with ipython.builtin_trap: + + trap = typing.cast(typing.ContextManager, ipython.builtin_trap) + with trap: yield ipython diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index c9a3d1ecb..a86590382 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 827b02dcf..cf215e2fd 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,3 @@ google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 87efa3dec..4b81fe0ad 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -2,9 +2,5 @@ bigquery_magics==0.5.0 db-dtypes==1.3.1 google.cloud.bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 -ipython===7.31.1; python_version == '3.7' -ipython===8.0.1; python_version == '3.8' -ipython===8.18.1; python_version >= '3.9' -pandas===1.3.5; python_version == '3.7' -pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +ipython===8.18.1 +pandas==2.2.3 diff --git a/samples/notebooks/jupyter_tutorial_test.py b/samples/notebooks/jupyter_tutorial_test.py index 2c2cf9390..1861a822f 100644 --- a/samples/notebooks/jupyter_tutorial_test.py +++ b/samples/notebooks/jupyter_tutorial_test.py @@ -45,7 +45,9 @@ def ipython_interactive( for the duration of the test scope. """ - with ipython.builtin_trap: + + trap = typing.cast(typing.ContextManager, ipython.builtin_trap) + with trap: yield ipython diff --git a/samples/notebooks/noxfile.py b/samples/notebooks/noxfile.py index c9a3d1ecb..a86590382 100644 --- a/samples/notebooks/noxfile.py +++ b/samples/notebooks/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 827b02dcf..cf215e2fd 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,3 @@ google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 77103a338..e92d084a4 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -2,13 +2,7 @@ bigquery-magics==0.5.0 db-dtypes==1.3.1 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 -ipython===7.31.1; python_version == '3.7' -ipython===8.0.1; python_version == '3.8' -ipython===8.18.1; python_version >= '3.9' -matplotlib===3.5.3; python_version == '3.7' -matplotlib===3.7.4; python_version == '3.8' +ipython==8.18.1 matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.0; python_version >= '3.10' -pandas===1.3.5; python_version == '3.7' -pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +pandas==2.2.3 diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index c9a3d1ecb..a86590382 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 077e465cf..52ccc8ab2 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,4 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/scripts/readme-gen/templates/install_deps.tmpl.rst b/scripts/readme-gen/templates/install_deps.tmpl.rst index 6f069c6c8..f21db80c4 100644 --- a/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 3.7+. +#. Create a virtualenv. Samples are compatible with Python 3.9+. .. code-block:: bash diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt deleted file mode 100644 index 55e63449f..000000000 --- a/testing/constraints-3.7.txt +++ /dev/null @@ -1,36 +0,0 @@ -# This constraints file is used to check that lower bounds -# are correct in setup.py -# List *all* library dependencies and extras in this file. -# Pin the version to the lower bound. -# -# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", -# Then this file should have foo==1.14.0 -bigquery-magics==0.1.0 -db-dtypes==0.3.0 -geopandas==0.9.0 -google-api-core==2.11.1 -google-auth==2.14.1 -google-cloud-bigquery-storage==2.24.0 -google-cloud-core==2.4.1 -google-cloud-testutils==1.4.0 -google-crc32c==1.5.0 -google-resumable-media==2.0.0 -googleapis-common-protos==1.62.0 -grpcio==1.47.0 -grpcio-status==1.47.0 -ipywidgets==7.7.1 -ipython==7.23.1 -ipykernel==6.0.0 -opentelemetry-api==1.1.0 -opentelemetry-instrumentation==0.20b0 -opentelemetry-sdk==1.1.0 -packaging==20.0.0 -pandas==1.1.0 -proto-plus==1.22.3 -protobuf==3.20.2 -pyarrow==3.0.0 -python-dateutil==2.7.3 -requests==2.21.0 -Shapely==1.8.4 -six==1.13.0 -tqdm==4.7.4 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt deleted file mode 100644 index 9883fb8cc..000000000 --- a/testing/constraints-3.8.txt +++ /dev/null @@ -1,11 +0,0 @@ -grpcio==1.47.0 -pandas==1.2.0 - -# This constraints file is used to check that lower bounds -# are correct in setup.py -# -# Pin the version to the lower bound. -# -# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", -# Then this file should have foo==1.14.0 -pandas-gbq==0.26.1 diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index d4c302867..63b5d8bf6 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -4,5 +4,29 @@ # # NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by # the renovate bot. +bigquery-magics==0.6.0 +db-dtypes==1.0.4 +geopandas==0.9.0 +google-api-core==2.11.1 +google-auth==2.14.1 +google-cloud-bigquery-storage==2.18.0 +google-cloud-core==2.4.1 +google-resumable-media==2.0.0 grpcio==1.47.0 -pyarrow>=4.0.0 +grpcio==1.49.1; python_version >= '3.11' +ipywidgets==7.7.1 +ipython==7.23.1 +ipykernel==6.2.0 +opentelemetry-api==1.1.0 +opentelemetry-instrumentation==0.20b0 +opentelemetry-sdk==1.1.0 +packaging==24.2.0 +pandas==1.1.4 +pandas-gbq==0.26.1 +proto-plus==1.22.3 +protobuf==3.20.2 +pyarrow==4.0.0 +python-dateutil==2.8.2 +requests==2.21.0 +Shapely==1.8.4 +tqdm==4.7.4 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index a9e76d416..e65fca27e 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1222,7 +1222,12 @@ def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype( # These pandas dtypes are handled by the custom dtypes. assert df.dtypes["bool_col"].name == "boolean" - assert df.dtypes["float64_col"].name == "Float64" + # Result is dependent upon which version of pandas is being used. + # Float64 was not introduced until pandas version 1.4. + if PANDAS_INSTALLED_VERSION >= "1.4": + assert df.dtypes["float64_col"].name == "Float64" + else: + assert df.dtypes["float64_col"].name == "string" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["string_col"].name == "string" diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py index 4b0945310..8e2845316 100644 --- a/tests/unit/job/test_copy.py +++ b/tests/unit/job/test_copy.py @@ -147,7 +147,6 @@ def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) config = resource.get("configuration", {}).get("copy") - table_ref = config["destinationTable"] self.assertEqual(job.destination.project, table_ref["projectId"]) self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 3a5d92dbd..2cda59bd1 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -645,6 +645,8 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) +# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 +# pragma added due to issues with coverage. @pytest.mark.skipif( pandas.__version__.startswith("2."), reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index fdd232a5c..48c085c1d 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -23,10 +23,7 @@ from unittest import mock import warnings -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata +import importlib.metadata as metadata try: import pandas diff --git a/tests/unit/test__versions_helpers.py b/tests/unit/test__versions_helpers.py index b1d0ef1ac..8379c87c1 100644 --- a/tests/unit/test__versions_helpers.py +++ b/tests/unit/test__versions_helpers.py @@ -188,14 +188,19 @@ def test_bqstorage_is_read_session_optional_false(): @pytest.mark.skipif(pandas is None, reason="pandas is not installed") -@pytest.mark.parametrize("version", ["1.5.0", "2.0.0", "2.1.0"]) +@pytest.mark.parametrize("version", ["1.1.5", "2.0.0", "2.1.0"]) def test_try_import_raises_no_error_w_recent_pandas(version): + # Comparing against the minimum allowed pandas version. + # As long as the installed version is greater than that, no + # error is raised. versions = _versions_helpers.PandasVersions() with mock.patch("pandas.__version__", new=version): try: pandas = versions.try_import(raise_if_error=True) assert pandas is not None - except exceptions.LegacyPandasError: # pragma: NO COVER + # this exception should not fire unless there is something broken + # hence the pragma. + except exceptions.LegacyPandasError: # pragma: no cover raise ("Legacy error raised with a non-legacy dependency version.") diff --git a/tests/unit/test_legacy_types.py b/tests/unit/test_legacy_types.py index 809be1855..75f3e77d7 100644 --- a/tests/unit/test_legacy_types.py +++ b/tests/unit/test_legacy_types.py @@ -18,9 +18,9 @@ import warnings try: - import proto # type: ignore + import proto except ImportError: - proto = None + proto = None # type: ignore @pytest.mark.skipif(proto is None, reason="proto is not installed") diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 73b29df6b..0f1e030cb 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -36,6 +36,7 @@ except ImportError: magics = None + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") IPython = pytest.importorskip("IPython") interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 1a3d7ec0f..7644186f3 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -16,7 +16,6 @@ import datetime import logging import re -from sys import version_info import time import types import unittest @@ -2171,10 +2170,11 @@ def test_to_geodataframe(self): df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) self.assertEqual(len(df), 0) # verify the number of rows - if version_info.major == 3 and version_info.minor > 7: - assert not hasattr(df, "crs") # used with Python > 3.7 + + if geopandas.__version__ == "0.9.0": + assert hasattr(df, "crs") else: - self.assertIsNone(df.crs) # used with Python == 3.7 + assert not hasattr(df, "crs") class TestRowIterator(unittest.TestCase): @@ -3699,10 +3699,10 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): user_warnings = [ warning for warning in warned if warning.category is UserWarning ] - # With Python 3.7 and 3.8, len(user_warnings) = 3. With pandas < 1.5, - # pandas.ArrowDtype is not supported. We raise warnings because - # range columns have to be converted to object. - # With higher Python versions and noextra tests, len(user_warnings) = 0 + # With pandas < 1.5, pandas.ArrowDtype is not supported + # and len(user_warnings) = 3. + # We raise warnings because range columns have to be converted to object. + # With higher pandas versions and noextra tests, len(user_warnings) = 0 self.assertIn(len(user_warnings), [0, 3]) self.assertEqual(len(df), 4) @@ -3734,10 +3734,10 @@ def test_to_dataframe_no_tqdm(self): user_warnings = [ warning for warning in warned if warning.category is UserWarning ] - # With Python 3.7 and 3.8, len(user_warnings) = 4. With pandas < 1.5, - # pandas.ArrowDtype is not supported. We raise warnings because - # range columns have to be converted to object. - # With higher Python versions and noextra tests, len(user_warnings) = 1 + # With pandas < 1.5, pandas.ArrowDtype is not supported + # and len(user_warnings) = 4. + # We raise warnings because range columns have to be converted to object. + # With higher pandas versions and noextra tests, len(user_warnings) = 1 self.assertIn(len(user_warnings), [1, 4]) # Even though the progress bar won't show, downloading the dataframe @@ -3991,6 +3991,8 @@ def test_to_dataframe_w_dtypes_mapper(self): ) self.assertEqual(df.name.dtype.name, "string") + # While pyproject.toml lists pandas 1.1 as the lowest supported version of + # pandas, the pip resolver is not able to resolve pandas 1.1 and numpy if hasattr(pandas, "Float64Dtype"): self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) self.assertEqual(df.miles.dtype.name, "Float64") @@ -4085,7 +4087,6 @@ def test_to_dataframe_w_dtypes_mapper(self): {"start": None, "end": None}, ], ) - else: self.assertEqual( list(df.date), diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 02a7a6a79..9e42fb737 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock import datetime import decimal -from unittest import mock import pytest @@ -34,12 +34,16 @@ def class_under_test(): return RowIterator +# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 +# pragma added due to issues with coverage. @pytest.mark.skipif( pandas.__version__.startswith("2."), reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", ) -def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): - # See tests/system/test_arrow.py for the actual types we get from the API. +def test_to_dataframe_nullable_scalars( + monkeypatch, class_under_test +): # pragma: NO COVER + """See tests/system/test_arrow.py for the actual types we get from the API.""" arrow_schema = pyarrow.schema( [ pyarrow.field("bignumeric_col", pyarrow.decimal256(76, scale=38)), @@ -129,12 +133,10 @@ def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): assert df["int64_col"][0] == -7 assert df["numeric_col"][0] == decimal.Decimal("-123.456789") assert df["string_col"][0] == "abcdefg" - # Pandas timedelta64 might be a better choice for pandas time columns. Then # they can more easily be combined with date columns to form datetimes. # https://github.com/googleapis/python-bigquery/issues/862 assert df["time_col"][0] == datetime.time(14, 21, 17, 123456) - assert df["timestamp_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456Z") From f8572dd86595361bae82c3232b2c0d159690a7b7 Mon Sep 17 00:00:00 2001 From: Lkhagvadorj Sukhtsoodol Date: Tue, 4 Mar 2025 18:50:05 +0000 Subject: [PATCH 415/536] fix: adding property setter for table constraints, #1990 (#2092) * fix: adding property setter for table constraints, #1990 * fix: adding unit test for to_api_repr() method * fix: adding system test for bigquery table update * fix: adding more test case for table constraints property * fix: adding more test case for table constraints property * fix: linting code * fix: adding unit tests for test table, table constraint and foreign key * fix: linting based on noxfile * fix: linting based on noxfile * fix: adding unit tests and system test * fix: clearing lint error * fix: adding table constraint eq unit tests * fix: adding type to to_api_repr resource --- google/cloud/bigquery/table.py | 45 +++- tests/system/test_client.py | 77 ++++++ tests/unit/test_table.py | 442 +++++++++++++++++++++++++++++++++ 3 files changed, 562 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index e7f3c9a36..9950b1a53 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1058,6 +1058,17 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @table_constraints.setter + def table_constraints(self, value): + """Tables Primary Key and Foreign Key information.""" + api_repr = value + if not isinstance(value, TableConstraints) and value is not None: + raise ValueError( + "value must be google.cloud.bigquery.table.TableConstraints or None" + ) + api_repr = value.to_api_repr() if value else None + self._properties[self._PROPERTY_TO_API_FIELD["table_constraints"]] = api_repr + @property def resource_tags(self): """Dict[str, str]: Resource tags for the table. @@ -1111,11 +1122,9 @@ def external_catalog_table_options( def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]: """Optional. Specifies metadata of the foreign data type definition in field schema (TableFieldSchema.foreign_type_definition). - Returns: Optional[schema.ForeignTypeInfo]: Foreign type information, or :data:`None` if not set. - .. Note:: foreign_type_info is only required if you are referencing an external catalog such as a Hive table. @@ -3404,6 +3413,20 @@ def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignKey": ], ) + def to_api_repr(self) -> Dict[str, Any]: + """Return a dictionary representing this object.""" + return { + "name": self.name, + "referencedTable": self.referenced_table.to_api_repr(), + "columnReferences": [ + { + "referencingColumn": column_reference.referencing_column, + "referencedColumn": column_reference.referenced_column, + } + for column_reference in self.column_references + ], + } + class TableConstraints: """The TableConstraints defines the primary key and foreign key. @@ -3425,6 +3448,13 @@ def __init__( self.primary_key = primary_key self.foreign_keys = foreign_keys + def __eq__(self, other): + if not isinstance(other, TableConstraints) and other is not None: + raise TypeError("The value provided is not a BigQuery TableConstraints.") + return ( + self.primary_key == other.primary_key if other.primary_key else None + ) and (self.foreign_keys == other.foreign_keys if other.foreign_keys else None) + @classmethod def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": """Create an instance from API representation.""" @@ -3440,6 +3470,17 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": ] return cls(primary_key, foreign_keys) + def to_api_repr(self) -> Dict[str, Any]: + """Return a dictionary representing this object.""" + resource: Dict[str, Any] = {} + if self.primary_key: + resource["primaryKey"] = {"columns": self.primary_key.columns} + if self.foreign_keys: + resource["foreignKeys"] = [ + foreign_key.to_api_repr() for foreign_key in self.foreign_keys + ] + return resource + def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 30e9f94a3..9df572b14 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -97,6 +97,10 @@ ], ), ] +TABLE_CONSTRAINTS_SCHEMA = [ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("fk_id", "STRING", mode="REQUIRED"), +] SOURCE_URIS_AVRO = [ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", @@ -901,6 +905,79 @@ def test_update_table_clustering_configuration(self): table3 = Config.CLIENT.update_table(table2, ["clustering_fields"]) self.assertIsNone(table3.clustering_fields, None) + def test_update_table_constraints(self): + from google.cloud.bigquery.table import TableConstraints + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + dataset = self.temp_dataset(_make_dataset_id("update_table")) + + TABLE_NAME = "test_table" + table_arg = Table(dataset.table(TABLE_NAME), schema=TABLE_CONSTRAINTS_SCHEMA) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + REFERENCE_TABLE_NAME = "test_table2" + reference_table_arg = Table( + dataset.table(REFERENCE_TABLE_NAME), + schema=[ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + ], + ) + reference_table = helpers.retry_403(Config.CLIENT.create_table)( + reference_table_arg + ) + self.to_delete.insert(0, reference_table) + self.assertTrue(_table_exists(reference_table)) + + reference_table.table_constraints = TableConstraints( + primary_key=PrimaryKey(columns=["id"]), foreign_keys=None + ) + reference_table2 = Config.CLIENT.update_table( + reference_table, ["table_constraints"] + ) + self.assertEqual( + reference_table2.table_constraints.primary_key, + reference_table.table_constraints.primary_key, + ) + + table_constraints = TableConstraints( + primary_key=PrimaryKey(columns=["id"]), + foreign_keys=[ + ForeignKey( + name="fk_id", + referenced_table=TableReference(dataset, "test_table2"), + column_references=[ + ColumnReference(referencing_column="id", referenced_column="id") + ], + ), + ], + ) + + table.table_constraints = table_constraints + table2 = Config.CLIENT.update_table(table, ["table_constraints"]) + self.assertEqual( + table2.table_constraints, + table_constraints, + ) + + table2.table_constraints = None + table3 = Config.CLIENT.update_table(table2, ["table_constraints"]) + self.assertIsNone(table3.table_constraints, None) + + reference_table2.table_constraints = None + reference_table3 = Config.CLIENT.update_table( + reference_table2, ["table_constraints"] + ) + self.assertIsNone(reference_table3.table_constraints, None) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 7644186f3..b846036ab 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -893,6 +893,158 @@ def test_table_constraints_property_getter(self): assert isinstance(table_constraints, TableConstraints) assert table_constraints.primary_key == PrimaryKey(columns=["id"]) + def test_table_constraints_property_setter(self): + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + PrimaryKey, + TableConstraints, + TableReference, + ) + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + primary_key = PrimaryKey(columns=["id"]) + foreign_keys = [ + ForeignKey( + name="fk_name", + referenced_table=TableReference.from_string( + "my_project.my_dataset.table" + ), + column_references=[ + ColumnReference( + referenced_column="product_id", referencing_column="id" + ) + ], + ) + ] + table_constraints = TableConstraints( + primary_key=primary_key, foreign_keys=foreign_keys + ) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == { + "primaryKey": {"columns": ["id"]}, + "foreignKeys": [ + { + "name": "fk_name", + "referencedTable": { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "table", + }, + "columnReferences": [ + {"referencedColumn": "product_id", "referencingColumn": "id"} + ], + } + ], + } + + def test_table_constraints_property_setter_empty_value(self): + from google.cloud.bigquery.table import TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table.table_constraints = TableConstraints(primary_key=None, foreign_keys=None) + assert table._properties["tableConstraints"] == {} + + def test_table_constraints_property_setter_invalid_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + with pytest.raises( + ValueError, + match="value must be google.cloud.bigquery.table.TableConstraints or None", + ): + table.table_constraints = "invalid_value" + + def test_table_constraints_property_setter_none_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table.table_constraints = None + assert table._properties["tableConstraints"] is None + + def test_table_constraints_property_setter_only_primary_key_set(self): + from google.cloud.bigquery.table import PrimaryKey, TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + primary_key = PrimaryKey(columns=["id"]) + + table_constraints = TableConstraints(primary_key=primary_key, foreign_keys=None) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == { + "primaryKey": {"columns": ["id"]} + } + + def test_table_constraints_property_setter_only_foriegn_keys(self): + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + TableConstraints, + TableReference, + ) + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + foreign_keys = [ + ForeignKey( + name="fk_name", + referenced_table=TableReference.from_string( + "my_project.my_dataset.table" + ), + column_references=[ + ColumnReference( + referenced_column="product_id", referencing_column="id" + ) + ], + ) + ] + table_constraints = TableConstraints( + primary_key=None, foreign_keys=foreign_keys + ) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == { + "foreignKeys": [ + { + "name": "fk_name", + "referencedTable": { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "table", + }, + "columnReferences": [ + {"referencedColumn": "product_id", "referencingColumn": "id"} + ], + } + ] + } + + def test_table_constraints_property_setter_empty_constraints(self): + from google.cloud.bigquery.table import TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table_constraints = TableConstraints(primary_key=None, foreign_keys=None) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == {} + def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -5889,6 +6041,48 @@ def test__eq__other_type(self): with self.assertRaises(TypeError): foreign_key == "This is not a Foreign Key" + def test_to_api_repr(self): + from google.cloud.bigquery.table import ColumnReference, TableReference + + name = "my_fk" + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + column_references = [ + ColumnReference(referencing_column="product_id", referenced_column="id") + ] + foreign_key = self._make_one(name, referenced_table, column_references) + + expected = { + "name": name, + "referencedTable": { + "projectId": "my-project", + "datasetId": "mydataset", + "tableId": "mytable", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"} + ], + } + self.assertEqual(foreign_key.to_api_repr(), expected) + + def test_to_api_repr_empty_column_references(self): + from google.cloud.bigquery.table import TableReference + + name = "my_fk" + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + column_references = [] + foreign_key = self._make_one(name, referenced_table, column_references) + + expected = { + "name": name, + "referencedTable": { + "projectId": "my-project", + "datasetId": "mydataset", + "tableId": "mytable", + }, + "columnReferences": [], + } + self.assertEqual(foreign_key.to_api_repr(), expected) + class TestTableConstraint(unittest.TestCase): @staticmethod @@ -5906,6 +6100,144 @@ def test_constructor_defaults(self): self.assertIsNone(instance.primary_key) self.assertIsNone(instance.foreign_keys) + def test_constructor_explicit(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + primary_key = PrimaryKey(columns=["my_pk_id"]) + foriegn_keys = [ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference(referencing_column="id", referenced_column="id"), + ], + ), + ] + + table_constraint = self._make_one( + primary_key=primary_key, + foreign_keys=foriegn_keys, + ) + + self.assertEqual(table_constraint.primary_key, primary_key) + self.assertEqual(table_constraint.foreign_keys, foriegn_keys) + + def test_constructor_explicit_with_none(self): + table_constraint = self._make_one(primary_key=None, foreign_keys=None) + + self.assertIsNone(table_constraint.primary_key) + self.assertIsNone(table_constraint.foreign_keys) + + def test__eq__primary_key_mismatch(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + foriegn_keys = [ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference(referencing_column="id", referenced_column="id"), + ], + ), + ] + + table_constraint = self._make_one( + primary_key=PrimaryKey(columns=["my_pk_id"]), + foreign_keys=foriegn_keys, + ) + other_table_constraint = self._make_one( + primary_key=PrimaryKey(columns=["my_other_pk_id"]), + foreign_keys=foriegn_keys, + ) + + self.assertNotEqual(table_constraint, other_table_constraint) + + def test__eq__foreign_keys_mismatch(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + primary_key = PrimaryKey(columns=["my_pk_id"]) + + table_constraint = self._make_one( + primary_key=primary_key, + foreign_keys=[ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference( + referencing_column="id", referenced_column="id" + ), + ], + ), + ], + ) + other_table_constraint = self._make_one( + primary_key=primary_key, + foreign_keys=[ + ForeignKey( + name="my_other_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-other-table" + ), + column_references=[ + ColumnReference( + referencing_column="other_id", referenced_column="other_id" + ), + ], + ), + ], + ) + + self.assertNotEqual(table_constraint, other_table_constraint) + + def test__eq__other_type(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + table_constraint = self._make_one( + primary_key=PrimaryKey(columns=["my_pk_id"]), + foreign_keys=[ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference( + referencing_column="id", referenced_column="id" + ), + ], + ), + ], + ) + with self.assertRaises(TypeError): + table_constraint == "This is not a Table Constraint" + def test_from_api_repr_full_resource(self): from google.cloud.bigquery.table import ( ColumnReference, @@ -5985,6 +6317,116 @@ def test_from_api_repr_only_foreign_keys_resource(self): self.assertIsNone(instance.primary_key) self.assertIsNotNone(instance.foreign_keys) + def test_to_api_repr(self): + from google.cloud.bigquery.table import ColumnReference, ForeignKey, PrimaryKey + + primary_key = PrimaryKey(columns=["id", "product_id"]) + foreign_keys = [ + ForeignKey( + name="my_fk_name", + referenced_table=TableReference.from_string( + "my-project.my-dataset.products" + ), + column_references=[ + ColumnReference( + referencing_column="product_id", referenced_column="id" + ), + ], + ) + ] + instance = self._make_one(primary_key=primary_key, foreign_keys=foreign_keys) + + expected = { + "primaryKey": { + "columns": ["id", "product_id"], + }, + "foreignKeys": [ + { + "name": "my_fk_name", + "referencedTable": { + "projectId": "my-project", + "datasetId": "my-dataset", + "tableId": "products", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"}, + ], + } + ], + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_only_primary_key(self): + from google.cloud.bigquery.table import PrimaryKey + + primary_key = PrimaryKey(columns=["id", "product_id"]) + instance = self._make_one(primary_key=primary_key, foreign_keys=None) + expected = { + "primaryKey": { + "columns": ["id", "product_id"], + }, + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_empty_primary_key(self): + from google.cloud.bigquery.table import PrimaryKey + + primary_key = PrimaryKey(columns=[]) + instance = self._make_one(primary_key=primary_key, foreign_keys=None) + + expected = { + "primaryKey": { + "columns": [], + }, + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_only_foreign_keys(self): + from google.cloud.bigquery.table import ColumnReference, ForeignKey + + foreign_keys = [ + ForeignKey( + name="my_fk_name", + referenced_table=TableReference.from_string( + "my-project.my-dataset.products" + ), + column_references=[ + ColumnReference( + referencing_column="product_id", referenced_column="id" + ), + ], + ) + ] + instance = self._make_one(primary_key=None, foreign_keys=foreign_keys) + expected = { + "foreignKeys": [ + { + "name": "my_fk_name", + "referencedTable": { + "projectId": "my-project", + "datasetId": "my-dataset", + "tableId": "products", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"}, + ], + } + ], + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_empty_foreign_keys(self): + foreign_keys = [] + instance = self._make_one(primary_key=None, foreign_keys=foreign_keys) + + expected = {} + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_empty_constraints(self): + instance = self._make_one(primary_key=None, foreign_keys=None) + expected = {} + self.assertEqual(instance.to_api_repr(), expected) + class TestExternalCatalogTableOptions: PROJECT = "test-project" From 2d5f9320d7103bc64c7ba496ba54bb0ef52b5605 Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Fri, 7 Mar 2025 13:58:40 -0800 Subject: [PATCH 416/536] feat: add query text and total bytes processed to RowIterator (#2140) --- google/cloud/bigquery/_job_helpers.py | 2 ++ google/cloud/bigquery/client.py | 8 ++++++++ google/cloud/bigquery/job/query.py | 2 ++ google/cloud/bigquery/table.py | 18 ++++++++++++++++++ tests/unit/job/test_query.py | 3 +++ tests/unit/test_client.py | 3 +++ 6 files changed, 36 insertions(+) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index b028cd357..a8373c356 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -526,6 +526,8 @@ def do_query(): query_id=query_results.query_id, project=query_results.project, num_dml_affected_rows=query_results.num_dml_affected_rows, + query=query, + total_bytes_processed=query_results.total_bytes_processed, ) if job_retry is not None: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 03ded93b1..8bbdd6c32 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -4081,6 +4081,8 @@ def _list_rows_from_query_results( query_id: Optional[str] = None, first_page_response: Optional[Dict[str, Any]] = None, num_dml_affected_rows: Optional[int] = None, + query: Optional[str] = None, + total_bytes_processed: Optional[int] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4128,6 +4130,10 @@ def _list_rows_from_query_results( num_dml_affected_rows (Optional[int]): If this RowIterator is the result of a DML query, the number of rows that were affected. + query (Optional[str]): + The query text used. + total_bytes_processed (Optinal[int]): + total bytes processed from job statistics, if present. Returns: google.cloud.bigquery.table.RowIterator: @@ -4165,6 +4171,8 @@ def _list_rows_from_query_results( query_id=query_id, first_page_response=first_page_response, num_dml_affected_rows=num_dml_affected_rows, + query=query, + total_bytes_processed=total_bytes_processed, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index ca2448eaa..a27c10530 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1741,6 +1741,8 @@ def is_job_done(): query_id=self.query_id, first_page_response=first_page_response, num_dml_affected_rows=self._query_results.num_dml_affected_rows, + query=self.query, + total_bytes_processed=self.total_bytes_processed, **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 9950b1a53..4d79d60da 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1760,6 +1760,10 @@ class RowIterator(HTTPIterator): first_page_response (Optional[dict]): API response for the first page of results. These are returned when the first page is requested. + query (Optional[str]): + The query text used. + total_bytes_processed (Optinal[int]): + total bytes processed from job statistics, if present. """ def __init__( @@ -1781,6 +1785,8 @@ def __init__( query_id: Optional[str] = None, project: Optional[str] = None, num_dml_affected_rows: Optional[int] = None, + query: Optional[str] = None, + total_bytes_processed: Optional[int] = None, ): super(RowIterator, self).__init__( client, @@ -1808,6 +1814,8 @@ def __init__( self._query_id = query_id self._project = project self._num_dml_affected_rows = num_dml_affected_rows + self._query = query + self._total_bytes_processed = total_bytes_processed @property def _billing_project(self) -> Optional[str]: @@ -1855,6 +1863,16 @@ def query_id(self) -> Optional[str]: """ return self._query_id + @property + def query(self) -> Optional[str]: + """The query text used.""" + return self._query + + @property + def total_bytes_processed(self) -> Optional[int]: + """total bytes processed from job statistics, if present.""" + return self._total_bytes_processed + def _is_almost_completely_cached(self): """Check if all results are completely cached. diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 4bbd31c73..1df65279d 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -887,6 +887,7 @@ def test_result_reloads_job_state_until_done(self): } job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") + job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -966,6 +967,8 @@ def test_result_reloads_job_state_until_done(self): # Test that the total_rows property has changed during iteration, based # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) + self.assertEqual(result.query, job.query) + self.assertEqual(result.total_bytes_processed, 1234) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 4f13d6ecc..34ef680dd 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5517,6 +5517,7 @@ def test_query_and_wait_defaults(self): "totalRows": "1", "rows": [{"f": [{"v": "5552452"}]}], "queryId": "job_abcDEF_", + "totalBytesProcessed": 1234, } creds = _make_credentials() http = object() @@ -5532,6 +5533,8 @@ def test_query_and_wait_defaults(self): self.assertIsNone(rows.job_id) self.assertIsNone(rows.project) self.assertIsNone(rows.location) + self.assertEqual(rows.query, query) + self.assertEqual(rows.total_bytes_processed, 1234) # Verify the request we send is to jobs.query. conn.api_request.assert_called_once() From faeb51d48dd9de9d2378db1898f770130d51887d Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 10 Mar 2025 11:02:52 -0400 Subject: [PATCH 417/536] chore: remove unused files (#2141) --- .github/.OwlBot.lock.yaml | 4 +- .kokoro/docker/docs/Dockerfile | 89 ----- .kokoro/docker/docs/fetch_gpg_keys.sh | 45 --- .kokoro/docker/docs/requirements.in | 2 - .kokoro/docker/docs/requirements.txt | 297 --------------- .kokoro/docs/common.cfg | 66 ---- .kokoro/docs/docs-presubmit.cfg | 28 -- .kokoro/docs/docs.cfg | 1 - .kokoro/publish-docs.sh | 58 --- .kokoro/release.sh | 29 -- .kokoro/release/common.cfg | 43 --- .kokoro/release/release.cfg | 1 - .kokoro/requirements.in | 11 - .kokoro/requirements.txt | 513 -------------------------- 14 files changed, 2 insertions(+), 1185 deletions(-) delete mode 100644 .kokoro/docker/docs/Dockerfile delete mode 100755 .kokoro/docker/docs/fetch_gpg_keys.sh delete mode 100644 .kokoro/docker/docs/requirements.in delete mode 100644 .kokoro/docker/docs/requirements.txt delete mode 100644 .kokoro/docs/common.cfg delete mode 100644 .kokoro/docs/docs-presubmit.cfg delete mode 100644 .kokoro/docs/docs.cfg delete mode 100755 .kokoro/publish-docs.sh delete mode 100755 .kokoro/release.sh delete mode 100644 .kokoro/release/common.cfg delete mode 100644 .kokoro/release/release.cfg delete mode 100644 .kokoro/requirements.in delete mode 100644 .kokoro/requirements.txt diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 3f7634f25..9d743afe8 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf -# created: 2025-02-21T19:32:52.01306189Z + digest: sha256:5581906b957284864632cde4e9c51d1cc66b0094990b27e689132fe5cd036046 +# created: 2025-03-07 diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile deleted file mode 100644 index e5410e296..000000000 --- a/.kokoro/docker/docs/Dockerfile +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ubuntu:24.04 - -ENV DEBIAN_FRONTEND noninteractive - -# Ensure local Python is preferred over distribution Python. -ENV PATH /usr/local/bin:$PATH - -# Install dependencies. -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - apt-transport-https \ - build-essential \ - ca-certificates \ - curl \ - dirmngr \ - git \ - gpg-agent \ - graphviz \ - libbz2-dev \ - libdb5.3-dev \ - libexpat1-dev \ - libffi-dev \ - liblzma-dev \ - libreadline-dev \ - libsnappy-dev \ - libssl-dev \ - libsqlite3-dev \ - portaudio19-dev \ - redis-server \ - software-properties-common \ - ssh \ - sudo \ - tcl \ - tcl-dev \ - tk \ - tk-dev \ - uuid-dev \ - wget \ - zlib1g-dev \ - && add-apt-repository universe \ - && apt-get update \ - && apt-get -y install jq \ - && apt-get clean autoclean \ - && apt-get autoremove -y \ - && rm -rf /var/lib/apt/lists/* \ - && rm -f /var/cache/apt/archives/*.deb - - -###################### Install python 3.10.14 for docs/docfx session - -# Download python 3.10.14 -RUN wget https://www.python.org/ftp/python/3.10.14/Python-3.10.14.tgz - -# Extract files -RUN tar -xvf Python-3.10.14.tgz - -# Install python 3.10.14 -RUN ./Python-3.10.14/configure --enable-optimizations -RUN make altinstall - -ENV PATH /usr/local/bin/python3.10:$PATH - -###################### Install pip -RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3.10 /tmp/get-pip.py \ - && rm /tmp/get-pip.py - -# Test pip -RUN python3.10 -m pip - -# Install build requirements -COPY requirements.txt /requirements.txt -RUN python3.10 -m pip install --require-hashes -r requirements.txt - -CMD ["python3.10"] diff --git a/.kokoro/docker/docs/fetch_gpg_keys.sh b/.kokoro/docker/docs/fetch_gpg_keys.sh deleted file mode 100755 index d653dd868..000000000 --- a/.kokoro/docker/docs/fetch_gpg_keys.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# A script to fetch gpg keys with retry. -# Avoid jinja parsing the file. -# - -function retry { - if [[ "${#}" -le 1 ]]; then - echo "Usage: ${0} retry_count commands.." - exit 1 - fi - local retries=${1} - local command="${@:2}" - until [[ "${retries}" -le 0 ]]; do - $command && return 0 - if [[ $? -ne 0 ]]; then - echo "command failed, retrying" - ((retries--)) - fi - done - return 1 -} - -# 3.6.9, 3.7.5 (Ned Deily) -retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ - 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D - -# 3.8.0 (Łukasz Langa) -retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ - E3FF2839C048B25C084DEBE9B26995E310250568 - -# diff --git a/.kokoro/docker/docs/requirements.in b/.kokoro/docker/docs/requirements.in deleted file mode 100644 index 586bd0703..000000000 --- a/.kokoro/docker/docs/requirements.in +++ /dev/null @@ -1,2 +0,0 @@ -nox -gcp-docuploader diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt deleted file mode 100644 index a9360a25b..000000000 --- a/.kokoro/docker/docs/requirements.txt +++ /dev/null @@ -1,297 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --allow-unsafe --generate-hashes requirements.in -# -argcomplete==3.5.3 \ - --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ - --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 - # via nox -cachetools==5.5.0 \ - --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ - --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a - # via google-auth -certifi==2024.12.14 \ - --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ - --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db - # via requests -charset-normalizer==3.4.1 \ - --hash=sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537 \ - --hash=sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa \ - --hash=sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a \ - --hash=sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294 \ - --hash=sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b \ - --hash=sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd \ - --hash=sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601 \ - --hash=sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd \ - --hash=sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4 \ - --hash=sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d \ - --hash=sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2 \ - --hash=sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313 \ - --hash=sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd \ - --hash=sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa \ - --hash=sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8 \ - --hash=sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1 \ - --hash=sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2 \ - --hash=sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496 \ - --hash=sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d \ - --hash=sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b \ - --hash=sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e \ - --hash=sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a \ - --hash=sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4 \ - --hash=sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca \ - --hash=sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78 \ - --hash=sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408 \ - --hash=sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5 \ - --hash=sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3 \ - --hash=sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f \ - --hash=sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a \ - --hash=sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765 \ - --hash=sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6 \ - --hash=sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146 \ - --hash=sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6 \ - --hash=sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9 \ - --hash=sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd \ - --hash=sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c \ - --hash=sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f \ - --hash=sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545 \ - --hash=sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176 \ - --hash=sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770 \ - --hash=sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824 \ - --hash=sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f \ - --hash=sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf \ - --hash=sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487 \ - --hash=sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d \ - --hash=sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd \ - --hash=sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b \ - --hash=sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534 \ - --hash=sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f \ - --hash=sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b \ - --hash=sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9 \ - --hash=sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd \ - --hash=sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125 \ - --hash=sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9 \ - --hash=sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de \ - --hash=sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11 \ - --hash=sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d \ - --hash=sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35 \ - --hash=sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f \ - --hash=sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda \ - --hash=sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7 \ - --hash=sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a \ - --hash=sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971 \ - --hash=sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8 \ - --hash=sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41 \ - --hash=sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d \ - --hash=sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f \ - --hash=sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757 \ - --hash=sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a \ - --hash=sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886 \ - --hash=sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77 \ - --hash=sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76 \ - --hash=sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247 \ - --hash=sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85 \ - --hash=sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb \ - --hash=sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7 \ - --hash=sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e \ - --hash=sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6 \ - --hash=sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037 \ - --hash=sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1 \ - --hash=sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e \ - --hash=sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807 \ - --hash=sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407 \ - --hash=sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c \ - --hash=sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12 \ - --hash=sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3 \ - --hash=sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089 \ - --hash=sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd \ - --hash=sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e \ - --hash=sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00 \ - --hash=sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616 - # via requests -click==8.1.8 \ - --hash=sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2 \ - --hash=sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a - # via gcp-docuploader -colorlog==6.9.0 \ - --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ - --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 - # via - # gcp-docuploader - # nox -distlib==0.3.9 \ - --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ - --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 - # via virtualenv -filelock==3.16.1 \ - --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ - --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 - # via virtualenv -gcp-docuploader==0.6.5 \ - --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ - --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea - # via -r requirements.in -google-api-core==2.24.0 \ - --hash=sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9 \ - --hash=sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf - # via - # google-cloud-core - # google-cloud-storage -google-auth==2.37.0 \ - --hash=sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00 \ - --hash=sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0 - # via - # google-api-core - # google-cloud-core - # google-cloud-storage -google-cloud-core==2.4.1 \ - --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ - --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 - # via google-cloud-storage -google-cloud-storage==2.19.0 \ - --hash=sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba \ - --hash=sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2 - # via gcp-docuploader -google-crc32c==1.6.0 \ - --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ - --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ - --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ - --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ - --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ - --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ - --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ - --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ - --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ - --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ - --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ - --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ - --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ - --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ - --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ - --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ - --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ - --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ - --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ - --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ - --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ - --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ - --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ - --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ - --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ - --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ - --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 - # via - # google-cloud-storage - # google-resumable-media -google-resumable-media==2.7.2 \ - --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ - --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 - # via google-cloud-storage -googleapis-common-protos==1.66.0 \ - --hash=sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c \ - --hash=sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed - # via google-api-core -idna==3.10 \ - --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ - --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 - # via requests -nox==2024.10.9 \ - --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ - --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in -packaging==24.2 \ - --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ - --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f - # via nox -platformdirs==4.3.6 \ - --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ - --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb - # via virtualenv -proto-plus==1.25.0 \ - --hash=sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961 \ - --hash=sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91 - # via google-api-core -protobuf==5.29.3 \ - --hash=sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f \ - --hash=sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7 \ - --hash=sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888 \ - --hash=sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620 \ - --hash=sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da \ - --hash=sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252 \ - --hash=sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a \ - --hash=sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e \ - --hash=sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107 \ - --hash=sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f \ - --hash=sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84 - # via - # gcp-docuploader - # google-api-core - # googleapis-common-protos - # proto-plus -pyasn1==0.6.1 \ - --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ - --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 \ - --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ - --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c - # via google-auth -requests==2.32.3 \ - --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ - --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 - # via - # google-api-core - # google-cloud-storage -rsa==4.9 \ - --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ - --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 - # via google-auth -six==1.17.0 \ - --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ - --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 - # via gcp-docuploader -tomli==2.2.1 \ - --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ - --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ - --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ - --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \ - --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \ - --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \ - --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \ - --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \ - --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \ - --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \ - --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \ - --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \ - --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \ - --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \ - --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \ - --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \ - --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \ - --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \ - --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \ - --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \ - --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \ - --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \ - --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \ - --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \ - --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \ - --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \ - --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \ - --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \ - --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \ - --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ - --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ - --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 - # via nox -urllib3==2.3.0 \ - --hash=sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df \ - --hash=sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d - # via requests -virtualenv==20.28.1 \ - --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ - --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 - # via nox diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg deleted file mode 100644 index 76ae5f13b..000000000 --- a/.kokoro/docs/common.cfg +++ /dev/null @@ -1,66 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-lib-docs" -} -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/publish-docs.sh" -} - -env_vars: { - key: "STAGING_BUCKET" - value: "docs-staging" -} - -env_vars: { - key: "V2_STAGING_BUCKET" - # Push google cloud library docs to the Cloud RAD bucket `docs-staging-v2` - value: "docs-staging-v2" -} - -# It will upload the docker image after successful builds. -env_vars: { - key: "TRAMPOLINE_IMAGE_UPLOAD" - value: "true" -} - -# It will always build the docker image. -env_vars: { - key: "TRAMPOLINE_DOCKERFILE" - value: ".kokoro/docker/docs/Dockerfile" -} - -# Fetch the token needed for reporting release status to GitHub -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "yoshi-automation-github-key" - } - } -} - -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "docuploader_service_account" - } - } -} diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg deleted file mode 100644 index 08adb2e28..000000000 --- a/.kokoro/docs/docs-presubmit.cfg +++ /dev/null @@ -1,28 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "STAGING_BUCKET" - value: "gcloud-python-test" -} - -env_vars: { - key: "V2_STAGING_BUCKET" - value: "gcloud-python-test" -} - -# We only upload the image in the main `docs` build. -env_vars: { - key: "TRAMPOLINE_IMAGE_UPLOAD" - value: "false" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/build.sh" -} - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "docs docfx" -} diff --git a/.kokoro/docs/docs.cfg b/.kokoro/docs/docs.cfg deleted file mode 100644 index 8f43917d9..000000000 --- a/.kokoro/docs/docs.cfg +++ /dev/null @@ -1 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh deleted file mode 100755 index 4ed4aaf13..000000000 --- a/.kokoro/publish-docs.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eo pipefail - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -export PATH="${HOME}/.local/bin:${PATH}" - -# build docs -nox -s docs - -# create metadata -python3.10 -m docuploader create-metadata \ - --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3.10 setup.py --version) \ - --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3.10 setup.py --name) \ - --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ - --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ - --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) - -cat docs.metadata - -# upload docs -python3.10 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" - - -# docfx yaml files -nox -s docfx - -# create metadata. -python3.10 -m docuploader create-metadata \ - --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3.10 setup.py --version) \ - --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3.10 setup.py --name) \ - --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ - --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ - --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) - -cat docs.metadata - -# upload docs -python3.10 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" diff --git a/.kokoro/release.sh b/.kokoro/release.sh deleted file mode 100755 index 65deb5ed3..000000000 --- a/.kokoro/release.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eo pipefail - -# Start the releasetool reporter -python3 -m pip install --require-hashes -r github/python-bigquery/.kokoro/requirements.txt -python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source /tmp/publisher-script - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -# Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-3") -cd github/python-bigquery -python3 setup.py sdist bdist_wheel -twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg deleted file mode 100644 index 6f57163f5..000000000 --- a/.kokoro/release/common.cfg +++ /dev/null @@ -1,43 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" -} -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/release.sh" -} - -# Fetch PyPI password -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "google-cloud-pypi-token-keystore-3" - } - } -} - -# Store the packages we uploaded to PyPI. That way, we have a record of exactly -# what we published, which we can use to generate SBOMs and attestations. -action { - define_artifacts { - regex: "github/python-bigquery/**/*.tar.gz" - strip_prefix: "github/python-bigquery" - } -} diff --git a/.kokoro/release/release.cfg b/.kokoro/release/release.cfg deleted file mode 100644 index 8f43917d9..000000000 --- a/.kokoro/release/release.cfg +++ /dev/null @@ -1 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/.kokoro/requirements.in b/.kokoro/requirements.in deleted file mode 100644 index fff4d9ce0..000000000 --- a/.kokoro/requirements.in +++ /dev/null @@ -1,11 +0,0 @@ -gcp-docuploader -gcp-releasetool>=2 # required for compatibility with cryptography>=42.x -importlib-metadata -typing-extensions -twine -wheel -setuptools -nox>=2022.11.21 # required to remove dependency on py -charset-normalizer<3 -click<8.1.0 -cryptography>=42.0.5 diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt deleted file mode 100644 index 6ad95a04a..000000000 --- a/.kokoro/requirements.txt +++ /dev/null @@ -1,513 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --allow-unsafe --generate-hashes requirements.in -# -argcomplete==3.5.1 \ - --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ - --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 - # via nox -attrs==24.2.0 \ - --hash=sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346 \ - --hash=sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2 - # via gcp-releasetool -backports-tarfile==1.2.0 \ - --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \ - --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991 - # via jaraco-context -cachetools==5.5.0 \ - --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ - --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a - # via google-auth -certifi==2024.8.30 \ - --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \ - --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9 - # via requests -cffi==1.17.1 \ - --hash=sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8 \ - --hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \ - --hash=sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1 \ - --hash=sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15 \ - --hash=sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36 \ - --hash=sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824 \ - --hash=sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8 \ - --hash=sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36 \ - --hash=sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17 \ - --hash=sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf \ - --hash=sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc \ - --hash=sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3 \ - --hash=sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed \ - --hash=sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702 \ - --hash=sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1 \ - --hash=sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8 \ - --hash=sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903 \ - --hash=sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6 \ - --hash=sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d \ - --hash=sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b \ - --hash=sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e \ - --hash=sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be \ - --hash=sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c \ - --hash=sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683 \ - --hash=sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9 \ - --hash=sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c \ - --hash=sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8 \ - --hash=sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1 \ - --hash=sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4 \ - --hash=sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655 \ - --hash=sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67 \ - --hash=sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595 \ - --hash=sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0 \ - --hash=sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65 \ - --hash=sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41 \ - --hash=sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6 \ - --hash=sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401 \ - --hash=sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6 \ - --hash=sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3 \ - --hash=sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16 \ - --hash=sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93 \ - --hash=sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e \ - --hash=sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4 \ - --hash=sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964 \ - --hash=sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c \ - --hash=sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576 \ - --hash=sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0 \ - --hash=sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3 \ - --hash=sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662 \ - --hash=sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3 \ - --hash=sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff \ - --hash=sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5 \ - --hash=sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd \ - --hash=sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f \ - --hash=sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5 \ - --hash=sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14 \ - --hash=sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d \ - --hash=sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9 \ - --hash=sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7 \ - --hash=sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382 \ - --hash=sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a \ - --hash=sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e \ - --hash=sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a \ - --hash=sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4 \ - --hash=sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99 \ - --hash=sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87 \ - --hash=sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b - # via cryptography -charset-normalizer==2.1.1 \ - --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ - --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f - # via - # -r requirements.in - # requests -click==8.0.4 \ - --hash=sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1 \ - --hash=sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb - # via - # -r requirements.in - # gcp-docuploader - # gcp-releasetool -colorlog==6.8.2 \ - --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ - --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 - # via - # gcp-docuploader - # nox -cryptography==44.0.1 \ - --hash=sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7 \ - --hash=sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3 \ - --hash=sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183 \ - --hash=sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69 \ - --hash=sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a \ - --hash=sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62 \ - --hash=sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911 \ - --hash=sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7 \ - --hash=sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a \ - --hash=sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41 \ - --hash=sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83 \ - --hash=sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12 \ - --hash=sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864 \ - --hash=sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf \ - --hash=sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c \ - --hash=sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2 \ - --hash=sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b \ - --hash=sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0 \ - --hash=sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4 \ - --hash=sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9 \ - --hash=sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008 \ - --hash=sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862 \ - --hash=sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009 \ - --hash=sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7 \ - --hash=sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f \ - --hash=sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026 \ - --hash=sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f \ - --hash=sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd \ - --hash=sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420 \ - --hash=sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14 \ - --hash=sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00 - # via - # -r requirements.in - # gcp-releasetool - # secretstorage -distlib==0.3.9 \ - --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ - --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 - # via virtualenv -docutils==0.21.2 \ - --hash=sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f \ - --hash=sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 - # via readme-renderer -filelock==3.16.1 \ - --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ - --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 - # via virtualenv -gcp-docuploader==0.6.5 \ - --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ - --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea - # via -r requirements.in -gcp-releasetool==2.1.1 \ - --hash=sha256:25639269f4eae510094f9dbed9894977e1966933211eb155a451deebc3fc0b30 \ - --hash=sha256:845f4ded3d9bfe8cc7fdaad789e83f4ea014affa77785259a7ddac4b243e099e - # via -r requirements.in -google-api-core==2.21.0 \ - --hash=sha256:4a152fd11a9f774ea606388d423b68aa7e6d6a0ffe4c8266f74979613ec09f81 \ - --hash=sha256:6869eacb2a37720380ba5898312af79a4d30b8bca1548fb4093e0697dc4bdf5d - # via - # google-cloud-core - # google-cloud-storage -google-auth==2.35.0 \ - --hash=sha256:25df55f327ef021de8be50bad0dfd4a916ad0de96da86cd05661c9297723ad3f \ - --hash=sha256:f4c64ed4e01e8e8b646ef34c018f8bf3338df0c8e37d8b3bba40e7f574a3278a - # via - # gcp-releasetool - # google-api-core - # google-cloud-core - # google-cloud-storage -google-cloud-core==2.4.1 \ - --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ - --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 - # via google-cloud-storage -google-cloud-storage==2.18.2 \ - --hash=sha256:97a4d45c368b7d401ed48c4fdfe86e1e1cb96401c9e199e419d289e2c0370166 \ - --hash=sha256:aaf7acd70cdad9f274d29332673fcab98708d0e1f4dceb5a5356aaef06af4d99 - # via gcp-docuploader -google-crc32c==1.6.0 \ - --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ - --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ - --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ - --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ - --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ - --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ - --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ - --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ - --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ - --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ - --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ - --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ - --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ - --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ - --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ - --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ - --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ - --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ - --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ - --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ - --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ - --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ - --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ - --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ - --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ - --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ - --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 - # via - # google-cloud-storage - # google-resumable-media -google-resumable-media==2.7.2 \ - --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ - --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 - # via google-cloud-storage -googleapis-common-protos==1.65.0 \ - --hash=sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63 \ - --hash=sha256:334a29d07cddc3aa01dee4988f9afd9b2916ee2ff49d6b757155dc0d197852c0 - # via google-api-core -idna==3.10 \ - --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ - --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 - # via requests -importlib-metadata==8.5.0 \ - --hash=sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b \ - --hash=sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7 - # via - # -r requirements.in - # keyring - # twine -jaraco-classes==3.4.0 \ - --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \ - --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790 - # via keyring -jaraco-context==6.0.1 \ - --hash=sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3 \ - --hash=sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4 - # via keyring -jaraco-functools==4.1.0 \ - --hash=sha256:70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d \ - --hash=sha256:ad159f13428bc4acbf5541ad6dec511f91573b90fba04df61dafa2a1231cf649 - # via keyring -jeepney==0.8.0 \ - --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ - --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755 - # via - # keyring - # secretstorage -jinja2==3.1.5 \ - --hash=sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb \ - --hash=sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb - # via gcp-releasetool -keyring==25.4.1 \ - --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ - --hash=sha256:b07ebc55f3e8ed86ac81dd31ef14e81ace9dd9c3d4b5d77a6e9a2016d0d71a1b - # via - # gcp-releasetool - # twine -markdown-it-py==3.0.0 \ - --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ - --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb - # via rich -markupsafe==3.0.1 \ - --hash=sha256:0778de17cff1acaeccc3ff30cd99a3fd5c50fc58ad3d6c0e0c4c58092b859396 \ - --hash=sha256:0f84af7e813784feb4d5e4ff7db633aba6c8ca64a833f61d8e4eade234ef0c38 \ - --hash=sha256:17b2aea42a7280db02ac644db1d634ad47dcc96faf38ab304fe26ba2680d359a \ - --hash=sha256:242d6860f1fd9191aef5fae22b51c5c19767f93fb9ead4d21924e0bcb17619d8 \ - --hash=sha256:244dbe463d5fb6d7ce161301a03a6fe744dac9072328ba9fc82289238582697b \ - --hash=sha256:26627785a54a947f6d7336ce5963569b5d75614619e75193bdb4e06e21d447ad \ - --hash=sha256:2a4b34a8d14649315c4bc26bbfa352663eb51d146e35eef231dd739d54a5430a \ - --hash=sha256:2ae99f31f47d849758a687102afdd05bd3d3ff7dbab0a8f1587981b58a76152a \ - --hash=sha256:312387403cd40699ab91d50735ea7a507b788091c416dd007eac54434aee51da \ - --hash=sha256:3341c043c37d78cc5ae6e3e305e988532b072329639007fd408a476642a89fd6 \ - --hash=sha256:33d1c36b90e570ba7785dacd1faaf091203d9942bc036118fab8110a401eb1a8 \ - --hash=sha256:3e683ee4f5d0fa2dde4db77ed8dd8a876686e3fc417655c2ece9a90576905344 \ - --hash=sha256:3ffb4a8e7d46ed96ae48805746755fadd0909fea2306f93d5d8233ba23dda12a \ - --hash=sha256:40621d60d0e58aa573b68ac5e2d6b20d44392878e0bfc159012a5787c4e35bc8 \ - --hash=sha256:40f1e10d51c92859765522cbd79c5c8989f40f0419614bcdc5015e7b6bf97fc5 \ - --hash=sha256:45d42d132cff577c92bfba536aefcfea7e26efb975bd455db4e6602f5c9f45e7 \ - --hash=sha256:48488d999ed50ba8d38c581d67e496f955821dc183883550a6fbc7f1aefdc170 \ - --hash=sha256:4935dd7883f1d50e2ffecca0aa33dc1946a94c8f3fdafb8df5c330e48f71b132 \ - --hash=sha256:4c2d64fdba74ad16138300815cfdc6ab2f4647e23ced81f59e940d7d4a1469d9 \ - --hash=sha256:4c8817557d0de9349109acb38b9dd570b03cc5014e8aabf1cbddc6e81005becd \ - --hash=sha256:4ffaaac913c3f7345579db4f33b0020db693f302ca5137f106060316761beea9 \ - --hash=sha256:5a4cb365cb49b750bdb60b846b0c0bc49ed62e59a76635095a179d440540c346 \ - --hash=sha256:62fada2c942702ef8952754abfc1a9f7658a4d5460fabe95ac7ec2cbe0d02abc \ - --hash=sha256:67c519635a4f64e495c50e3107d9b4075aec33634272b5db1cde839e07367589 \ - --hash=sha256:6a54c43d3ec4cf2a39f4387ad044221c66a376e58c0d0e971d47c475ba79c6b5 \ - --hash=sha256:7044312a928a66a4c2a22644147bc61a199c1709712069a344a3fb5cfcf16915 \ - --hash=sha256:730d86af59e0e43ce277bb83970530dd223bf7f2a838e086b50affa6ec5f9295 \ - --hash=sha256:800100d45176652ded796134277ecb13640c1a537cad3b8b53da45aa96330453 \ - --hash=sha256:80fcbf3add8790caddfab6764bde258b5d09aefbe9169c183f88a7410f0f6dea \ - --hash=sha256:82b5dba6eb1bcc29cc305a18a3c5365d2af06ee71b123216416f7e20d2a84e5b \ - --hash=sha256:852dc840f6d7c985603e60b5deaae1d89c56cb038b577f6b5b8c808c97580f1d \ - --hash=sha256:8ad4ad1429cd4f315f32ef263c1342166695fad76c100c5d979c45d5570ed58b \ - --hash=sha256:8ae369e84466aa70f3154ee23c1451fda10a8ee1b63923ce76667e3077f2b0c4 \ - --hash=sha256:93e8248d650e7e9d49e8251f883eed60ecbc0e8ffd6349e18550925e31bd029b \ - --hash=sha256:973a371a55ce9ed333a3a0f8e0bcfae9e0d637711534bcb11e130af2ab9334e7 \ - --hash=sha256:9ba25a71ebf05b9bb0e2ae99f8bc08a07ee8e98c612175087112656ca0f5c8bf \ - --hash=sha256:a10860e00ded1dd0a65b83e717af28845bb7bd16d8ace40fe5531491de76b79f \ - --hash=sha256:a4792d3b3a6dfafefdf8e937f14906a51bd27025a36f4b188728a73382231d91 \ - --hash=sha256:a7420ceda262dbb4b8d839a4ec63d61c261e4e77677ed7c66c99f4e7cb5030dd \ - --hash=sha256:ad91738f14eb8da0ff82f2acd0098b6257621410dcbd4df20aaa5b4233d75a50 \ - --hash=sha256:b6a387d61fe41cdf7ea95b38e9af11cfb1a63499af2759444b99185c4ab33f5b \ - --hash=sha256:b954093679d5750495725ea6f88409946d69cfb25ea7b4c846eef5044194f583 \ - --hash=sha256:bbde71a705f8e9e4c3e9e33db69341d040c827c7afa6789b14c6e16776074f5a \ - --hash=sha256:beeebf760a9c1f4c07ef6a53465e8cfa776ea6a2021eda0d0417ec41043fe984 \ - --hash=sha256:c91b394f7601438ff79a4b93d16be92f216adb57d813a78be4446fe0f6bc2d8c \ - --hash=sha256:c97ff7fedf56d86bae92fa0a646ce1a0ec7509a7578e1ed238731ba13aabcd1c \ - --hash=sha256:cb53e2a99df28eee3b5f4fea166020d3ef9116fdc5764bc5117486e6d1211b25 \ - --hash=sha256:cbf445eb5628981a80f54087f9acdbf84f9b7d862756110d172993b9a5ae81aa \ - --hash=sha256:d06b24c686a34c86c8c1fba923181eae6b10565e4d80bdd7bc1c8e2f11247aa4 \ - --hash=sha256:d98e66a24497637dd31ccab090b34392dddb1f2f811c4b4cd80c230205c074a3 \ - --hash=sha256:db15ce28e1e127a0013dfb8ac243a8e392db8c61eae113337536edb28bdc1f97 \ - --hash=sha256:db842712984e91707437461930e6011e60b39136c7331e971952bb30465bc1a1 \ - --hash=sha256:e24bfe89c6ac4c31792793ad9f861b8f6dc4546ac6dc8f1c9083c7c4f2b335cd \ - --hash=sha256:e81c52638315ff4ac1b533d427f50bc0afc746deb949210bc85f05d4f15fd772 \ - --hash=sha256:e9393357f19954248b00bed7c56f29a25c930593a77630c719653d51e7669c2a \ - --hash=sha256:ee3941769bd2522fe39222206f6dd97ae83c442a94c90f2b7a25d847d40f4729 \ - --hash=sha256:f31ae06f1328595d762c9a2bf29dafd8621c7d3adc130cbb46278079758779ca \ - --hash=sha256:f94190df587738280d544971500b9cafc9b950d32efcb1fba9ac10d84e6aa4e6 \ - --hash=sha256:fa7d686ed9883f3d664d39d5a8e74d3c5f63e603c2e3ff0abcba23eac6542635 \ - --hash=sha256:fb532dd9900381d2e8f48172ddc5a59db4c445a11b9fab40b3b786da40d3b56b \ - --hash=sha256:fe32482b37b4b00c7a52a07211b479653b7fe4f22b2e481b9a9b099d8a430f2f - # via jinja2 -mdurl==0.1.2 \ - --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ - --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba - # via markdown-it-py -more-itertools==10.5.0 \ - --hash=sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef \ - --hash=sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6 - # via - # jaraco-classes - # jaraco-functools -nh3==0.2.18 \ - --hash=sha256:0411beb0589eacb6734f28d5497ca2ed379eafab8ad8c84b31bb5c34072b7164 \ - --hash=sha256:14c5a72e9fe82aea5fe3072116ad4661af5cf8e8ff8fc5ad3450f123e4925e86 \ - --hash=sha256:19aaba96e0f795bd0a6c56291495ff59364f4300d4a39b29a0abc9cb3774a84b \ - --hash=sha256:34c03fa78e328c691f982b7c03d4423bdfd7da69cd707fe572f544cf74ac23ad \ - --hash=sha256:36c95d4b70530b320b365659bb5034341316e6a9b30f0b25fa9c9eff4c27a204 \ - --hash=sha256:3a157ab149e591bb638a55c8c6bcb8cdb559c8b12c13a8affaba6cedfe51713a \ - --hash=sha256:42c64511469005058cd17cc1537578eac40ae9f7200bedcfd1fc1a05f4f8c200 \ - --hash=sha256:5f36b271dae35c465ef5e9090e1fdaba4a60a56f0bb0ba03e0932a66f28b9189 \ - --hash=sha256:6955369e4d9f48f41e3f238a9e60f9410645db7e07435e62c6a9ea6135a4907f \ - --hash=sha256:7b7c2a3c9eb1a827d42539aa64091640bd275b81e097cd1d8d82ef91ffa2e811 \ - --hash=sha256:8ce0f819d2f1933953fca255db2471ad58184a60508f03e6285e5114b6254844 \ - --hash=sha256:94a166927e53972a9698af9542ace4e38b9de50c34352b962f4d9a7d4c927af4 \ - --hash=sha256:a7f1b5b2c15866f2db413a3649a8fe4fd7b428ae58be2c0f6bca5eefd53ca2be \ - --hash=sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50 \ - --hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \ - --hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe - # via readme-renderer -nox==2024.10.9 \ - --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ - --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 - # via - # gcp-releasetool - # nox -pkginfo==1.10.0 \ - --hash=sha256:5df73835398d10db79f8eecd5cd86b1f6d29317589ea70796994d49399af6297 \ - --hash=sha256:889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 - # via twine -platformdirs==4.3.6 \ - --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ - --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb - # via virtualenv -proto-plus==1.24.0 \ - --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ - --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 - # via google-api-core -protobuf==5.28.2 \ - --hash=sha256:2c69461a7fcc8e24be697624c09a839976d82ae75062b11a0972e41fd2cd9132 \ - --hash=sha256:35cfcb15f213449af7ff6198d6eb5f739c37d7e4f1c09b5d0641babf2cc0c68f \ - --hash=sha256:52235802093bd8a2811abbe8bf0ab9c5f54cca0a751fdd3f6ac2a21438bffece \ - --hash=sha256:59379674ff119717404f7454647913787034f03fe7049cbef1d74a97bb4593f0 \ - --hash=sha256:5e8a95246d581eef20471b5d5ba010d55f66740942b95ba9b872d918c459452f \ - --hash=sha256:87317e9bcda04a32f2ee82089a204d3a2f0d3c8aeed16568c7daf4756e4f1fe0 \ - --hash=sha256:8ddc60bf374785fb7cb12510b267f59067fa10087325b8e1855b898a0d81d276 \ - --hash=sha256:a8b9403fc70764b08d2f593ce44f1d2920c5077bf7d311fefec999f8c40f78b7 \ - --hash=sha256:c0ea0123dac3399a2eeb1a1443d82b7afc9ff40241433296769f7da42d142ec3 \ - --hash=sha256:ca53faf29896c526863366a52a8f4d88e69cd04ec9571ed6082fa117fac3ab36 \ - --hash=sha256:eeea10f3dc0ac7e6b4933d32db20662902b4ab81bf28df12218aa389e9c2102d - # via - # gcp-docuploader - # gcp-releasetool - # google-api-core - # googleapis-common-protos - # proto-plus -pyasn1==0.6.1 \ - --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ - --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 \ - --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ - --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c - # via google-auth -pycparser==2.22 \ - --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ - --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc - # via cffi -pygments==2.18.0 \ - --hash=sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199 \ - --hash=sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a - # via - # readme-renderer - # rich -pyjwt==2.9.0 \ - --hash=sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850 \ - --hash=sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c - # via gcp-releasetool -pyperclip==1.9.0 \ - --hash=sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310 - # via gcp-releasetool -python-dateutil==2.9.0.post0 \ - --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ - --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 - # via gcp-releasetool -readme-renderer==44.0 \ - --hash=sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151 \ - --hash=sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1 - # via twine -requests==2.32.3 \ - --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ - --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 - # via - # gcp-releasetool - # google-api-core - # google-cloud-storage - # requests-toolbelt - # twine -requests-toolbelt==1.0.0 \ - --hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \ - --hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06 - # via twine -rfc3986==2.0.0 \ - --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ - --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c - # via twine -rich==13.9.2 \ - --hash=sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c \ - --hash=sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1 - # via twine -rsa==4.9 \ - --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ - --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 - # via google-auth -secretstorage==3.3.3 \ - --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \ - --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99 - # via keyring -six==1.16.0 \ - --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ - --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 - # via - # gcp-docuploader - # python-dateutil -tomli==2.0.2 \ - --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ - --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed - # via nox -twine==5.1.1 \ - --hash=sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997 \ - --hash=sha256:9aa0825139c02b3434d913545c7b847a21c835e11597f5255842d457da2322db - # via -r requirements.in -typing-extensions==4.12.2 \ - --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ - --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 - # via - # -r requirements.in - # rich -urllib3==2.2.3 \ - --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \ - --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9 - # via - # requests - # twine -virtualenv==20.26.6 \ - --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ - --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 - # via nox -wheel==0.44.0 \ - --hash=sha256:2376a90c98cc337d18623527a97c31797bd02bad0033d41547043a1cbfbe448f \ - --hash=sha256:a29c3f2817e95ab89aa4660681ad547c0e9547f20e75b0562fe7723c9a2a9d49 - # via -r requirements.in -zipp==3.20.2 \ - --hash=sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350 \ - --hash=sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29 - # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -setuptools==75.1.0 \ - --hash=sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2 \ - --hash=sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538 - # via -r requirements.in From d7f76853d598c354bfd2e65f5dde28dae97da0ec Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 14 Mar 2025 09:36:26 -0400 Subject: [PATCH 418/536] fix: remove setup.cfg configuration for creating universal wheels (#2146) `setup.cfg` contains a setting to create a `Universal Wheel` which is only needed if libraries support both Python 2 and Python 3. This library only supports Python 3 so this setting is no longer needed. See https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#wheels. See similar PR https://togithub.com/googleapis/google-cloud-python/pull/13659 which includes this stack trace ``` running bdist_wheel /tmp/pip-build-env-9o_3w17v/overlay/lib/python3.13/site-packages/setuptools/_distutils/cmd.py:135: SetuptoolsDeprecationWarning: bdist_wheel.universal is deprecated !! ******************************************************************************** With Python 2.7 end-of-life, support for building universal wheels (i.e., wheels that support both Python 2 and Python 3) is being obviated. Please discontinue using this option, or if you still need it, file an issue with pypa/setuptools describing your use case. By 2025-Aug-30, you need to update your project and remove deprecated calls or your builds will no longer be supported. ******************************************************************************** !! ``` --- setup.cfg | 4 ---- 1 file changed, 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index 37b63aa49..d5e734f0f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,10 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Generated by synthtool. DO NOT EDIT! -[bdist_wheel] -universal = 1 - [pytype] python_version = 3.8 inputs = From 0842aa10967b1d8395cfb43e52c8ea091b381870 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Tue, 18 Mar 2025 15:30:46 -0400 Subject: [PATCH 419/536] fix: Allow protobuf 6.x (#2142) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Allow protobuf 6.x * update prerelease_deps nox session so that prerelease versions of protobuf are installed * ensure python-bigquery is installed from source * feat: add support for Python 3.13 * restore replacement * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix typo --------- Co-authored-by: Owl Bot --- .github/sync-repo-settings.yaml | 5 +- ...deps-3.12.cfg => prerelease-deps-3.13.cfg} | 2 +- .../{snippets-3.12.cfg => snippets-3.13.cfg} | 2 +- .../{system-3.12.cfg => system-3.13.cfg} | 4 +- .kokoro/presubmit/system-3.9.cfg | 2 +- CONTRIBUTING.rst | 7 +- noxfile.py | 78 ++++++++++--------- owlbot.py | 35 ++------- pyproject.toml | 35 +++++---- testing/constraints-3.13.txt | 0 10 files changed, 76 insertions(+), 94 deletions(-) rename .kokoro/continuous/{prerelease-deps-3.12.cfg => prerelease-deps-3.13.cfg} (77%) rename .kokoro/presubmit/{snippets-3.12.cfg => snippets-3.13.cfg} (81%) rename .kokoro/presubmit/{system-3.12.cfg => system-3.13.cfg} (81%) create mode 100644 testing/constraints-3.13.txt diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 188c44bbd..1e61b4d65 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -10,14 +10,15 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro system-3.12' - - 'Kokoro snippets-3.12' + - 'Kokoro system-3.13' + - 'Kokoro snippets-3.13' - 'cla/google' - 'Samples - Lint' - 'Samples - Python 3.9' - 'Samples - Python 3.10' - 'Samples - Python 3.11' - 'Samples - Python 3.12' + - 'Samples - Python 3.13' - pattern: v2 requiresLinearHistory: true requiresCodeOwnerReviews: true diff --git a/.kokoro/continuous/prerelease-deps-3.12.cfg b/.kokoro/continuous/prerelease-deps-3.13.cfg similarity index 77% rename from .kokoro/continuous/prerelease-deps-3.12.cfg rename to .kokoro/continuous/prerelease-deps-3.13.cfg index ece962a17..99a1e7150 100644 --- a/.kokoro/continuous/prerelease-deps-3.12.cfg +++ b/.kokoro/continuous/prerelease-deps-3.13.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.12" + value: "prerelease_deps-3.13" } diff --git a/.kokoro/presubmit/snippets-3.12.cfg b/.kokoro/presubmit/snippets-3.13.cfg similarity index 81% rename from .kokoro/presubmit/snippets-3.12.cfg rename to .kokoro/presubmit/snippets-3.13.cfg index 1381e8323..0b89f0863 100644 --- a/.kokoro/presubmit/snippets-3.12.cfg +++ b/.kokoro/presubmit/snippets-3.13.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-3.12" + value: "snippets-3.13" } diff --git a/.kokoro/presubmit/system-3.12.cfg b/.kokoro/presubmit/system-3.13.cfg similarity index 81% rename from .kokoro/presubmit/system-3.12.cfg rename to .kokoro/presubmit/system-3.13.cfg index 789455bd6..a0e9a0108 100644 --- a/.kokoro/presubmit/system-3.12.cfg +++ b/.kokoro/presubmit/system-3.13.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "system-3.12" -} + value: "system-3.13" +} \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.9.cfg b/.kokoro/presubmit/system-3.9.cfg index bd1fb514b..b8ae66b37 100644 --- a/.kokoro/presubmit/system-3.9.cfg +++ b/.kokoro/presubmit/system-3.9.cfg @@ -4,4 +4,4 @@ env_vars: { key: "NOX_SESSION" value: "system-3.9" -} +} \ No newline at end of file diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 8f4d54bce..b2993768b 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -143,12 +143,13 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.9 -- -k + $ nox -s system-3.13 -- -k .. note:: - System tests are configured to run under Python 3.9, 3.11, 3.12. + System tests are only configured to run under Python 3.9 and 3.13. + For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local auth settings and change some configuration in your project to @@ -238,7 +239,7 @@ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/main/noxfile.py -We also explicitly decided to support Python 3 beginning with version 3.7. +We also explicitly decided to support Python 3 beginning with version 3.9. Reasons for this include: - Encouraging use of newest versions of Python 3 diff --git a/noxfile.py b/noxfile.py index f069f8d37..1b118836b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -38,8 +38,8 @@ ) DEFAULT_PYTHON_VERSION = "3.9" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] -UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() @@ -362,6 +362,40 @@ def prerelease_deps(session): https://github.com/googleapis/python-bigquery/issues/95 """ + # Because we test minimum dependency versions on the minimum Python + # version, the first version we test with in the unit tests sessions has a + # constraints file containing all dependencies and extras. + with open( + CURRENT_DIRECTORY + / "testing" + / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", + encoding="utf-8", + ) as constraints_file: + constraints_text = constraints_file.read() + + # Ignore leading whitespace and comment lines. + deps = [ + match.group(1) + for match in re.finditer( + r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + ) + ] + + session.install(*deps) + + session.install( + "--pre", + "--upgrade", + "freezegun", + "google-cloud-datacatalog", + "google-cloud-resource-manager", + "google-cloud-storage", + "google-cloud-testutils", + "psutil", + "pytest", + "pytest-cov", + ) + # PyArrow prerelease packages are published to an alternative PyPI host. # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages session.install( @@ -386,48 +420,18 @@ def prerelease_deps(session): session.install( "--pre", "--upgrade", + "--no-deps", "google-api-core", "google-cloud-bigquery-storage", "google-cloud-core", "google-resumable-media", "db-dtypes", - # Exclude version 1.49.0rc1 which has a known issue. See https://github.com/grpc/grpc/pull/30642 - "grpcio!=1.49.0rc1", - ) - session.install( - "freezegun", - "google-cloud-datacatalog", - "google-cloud-resource-manager", - "google-cloud-storage", - "google-cloud-testutils", - "psutil", - "pytest", - "pytest-cov", + "grpcio", + "protobuf", ) - # Because we test minimum dependency versions on the minimum Python - # version, the first version we test with in the unit tests sessions has a - # constraints file containing all dependencies and extras. - with open( - CURRENT_DIRECTORY - / "testing" - / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", - encoding="utf-8", - ) as constraints_file: - constraints_text = constraints_file.read() - - # Ignore leading whitespace and comment lines. - deps = [ - match.group(1) - for match in re.finditer( - r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE - ) - ] - - # We use --no-deps to ensure that pre-release versions aren't overwritten - # by the version ranges in setup.py. - session.install(*deps) - session.install("--no-deps", "-e", ".[all]") + # Ensure that this library is installed from source + session.install("-e", ".", "--no-deps") # Print out prerelease package versions. session.run("python", "-m", "pip", "freeze") diff --git a/owlbot.py b/owlbot.py index c8efaa98d..fceeaa1b6 100644 --- a/owlbot.py +++ b/owlbot.py @@ -55,6 +55,9 @@ "geopandas": "https://geopandas.org/", "pandas": "https://pandas.pydata.org/pandas-docs/stable/", }, + system_test_python_versions=["3.9", "3.13"], + unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13"], + default_python_version="3.9", ) # BigQuery has a custom multiprocessing note @@ -86,41 +89,13 @@ python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6""", "python3.9 -m pip install --upgrade --quiet nox virtualenv", ) -s.replace( - "CONTRIBUTING.rst", - "3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", - "3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", -) -s.replace( - "CONTRIBUTING.rst", - r" \$ nox -s system-3.8 -- -k ", - r" $ nox -s system-3.9 -- -k ", -) -s.replace( - "CONTRIBUTING.rst", - r"""System tests are only configured to run under Python 3.8. - For expediency, we do not run them in older versions of Python 3.""", - r"System tests are configured to run under Python 3.9, 3.11, 3.12.", -) + s.replace( "CONTRIBUTING.rst", r"\$ nox -s py-3.8", r"$ nox -s py-3.9", ) -s.replace( - "CONTRIBUTING.rst", - r"""- `Python 3.7`_ -- `Python 3.8`_ -""", - r"", -) -s.replace( - "CONTRIBUTING.rst", - r""".. _Python 3.7: https://docs.python.org/3.7/ -.. _Python 3.8: https://docs.python.org/3.8/ -""", - r"", -) + s.replace( "scripts/readme-gen/templates/install_deps.tmpl.rst", r"Samples are compatible with Python 3.7", diff --git a/pyproject.toml b/pyproject.toml index 8822fc57d..17bf4fd20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,17 +37,18 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", "Topic :: Internet", ] dependencies = [ - "google-api-core[grpc] >= 2.11.1, < 3.0.0dev", - "google-auth >= 2.14.1, < 3.0.0dev", - "google-cloud-core >= 2.4.1, < 3.0.0dev", - "google-resumable-media >= 2.0.0, < 3.0dev", + "google-api-core[grpc] >= 2.11.1, < 3.0.0", + "google-auth >= 2.14.1, < 3.0.0", + "google-cloud-core >= 2.4.1, < 3.0.0", + "google-resumable-media >= 2.0.0, < 3.0.0", "packaging >= 24.2.0", - "python-dateutil >= 2.8.2, < 3.0dev", - "requests >= 2.21.0, < 3.0.0dev", + "python-dateutil >= 2.8.2, < 3.0.0", + "requests >= 2.21.0, < 3.0.0", ] dynamic = ["version"] @@ -59,37 +60,37 @@ Repository = "https://github.com/googleapis/python-bigquery" # moved back to optional due to bloat. See # https://github.com/googleapis/python-bigquery/issues/1196 for more background. bqstorage = [ - "google-cloud-bigquery-storage >= 2.18.0, < 3.0.0dev", + "google-cloud-bigquery-storage >= 2.18.0, < 3.0.0", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. # See: https://github.com/googleapis/python-bigquery/issues/83 The # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.47.0, < 2.0dev", - "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", + "grpcio >= 1.47.0, < 2.0.0", + "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", "pyarrow >= 4.0.0", ] pandas = [ "pandas >= 1.1.4", - "pandas-gbq >= 0.26.1; python_version >= '3.8'", - "grpcio >= 1.47.0, < 2.0dev", - "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", + "pandas-gbq >= 0.26.1", + "grpcio >= 1.47.0, < 2.0.0", + "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", "pyarrow >= 3.0.0", - "db-dtypes >= 1.0.4, < 2.0.0dev", + "db-dtypes >= 1.0.4, < 2.0.0", ] ipywidgets = ["ipywidgets >= 7.7.1", "ipykernel >= 6.2.0"] -geopandas = ["geopandas >= 0.9.0, < 2.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] +geopandas = ["geopandas >= 0.9.0, < 2.0.0", "Shapely >= 1.8.4, < 3.0.0"] ipython = ["ipython >= 7.23.1", "bigquery-magics >= 0.6.0"] -tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] +tqdm = ["tqdm >= 4.7.4, < 5.0.0"] opentelemetry = [ "opentelemetry-api >= 1.1.0", "opentelemetry-sdk >= 1.1.0", "opentelemetry-instrumentation >= 0.20b0", ] bigquery_v2 = [ - "proto-plus >= 1.22.3, < 2.0.0dev", - "protobuf >= 3.20.2, < 6.0.0dev, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. + "proto-plus >= 1.22.3, < 2.0.0", + "protobuf >= 3.20.2, < 7.0.0, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. ] all = [ "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,tqdm,opentelemetry,bigquery_v2]", diff --git a/testing/constraints-3.13.txt b/testing/constraints-3.13.txt new file mode 100644 index 000000000..e69de29bb From 9acd9c15a18bb2c0ff9d12d306598a23a80a5b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 19 Mar 2025 10:17:38 -0500 Subject: [PATCH 420/536] chore: refactor cell data parsing to use classes for easier overrides (#2144) * chore: refactor cell data parsing to use classes for easier overrides * improve error messages with urls for valueerrors * Update tests/unit/_helpers/test_cell_data_parser.py * remove unreachable code --- google/cloud/bigquery/_helpers.py | 499 ++++++++++-------- google/cloud/bigquery/query.py | 27 +- google/cloud/bigquery/table.py | 2 +- tests/unit/_helpers/test_cell_data_parser.py | 467 ++++++++++++++++ tests/unit/_helpers/test_from_json.py | 157 ------ .../test_scalar_query_param_parser.py | 93 ++++ tests/unit/test__helpers.py | 479 ----------------- 7 files changed, 858 insertions(+), 866 deletions(-) create mode 100644 tests/unit/_helpers/test_cell_data_parser.py delete mode 100644 tests/unit/_helpers/test_from_json.py create mode 100644 tests/unit/_helpers/test_scalar_query_param_parser.py diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index d40217c4d..4ba3ccf93 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -21,8 +21,9 @@ import math import re import os +import textwrap import warnings -from typing import Optional, Union, Any, Tuple, Type +from typing import Any, Optional, Tuple, Type, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -133,243 +134,305 @@ def _not_null(value, field): return value is not None or (field is not None and field.mode != "NULLABLE") -def _int_from_json(value, field): - """Coerce 'value' to an int, if set or not nullable.""" - if _not_null(value, field): - return int(value) - - -def _interval_from_json( - value: Optional[str], field -) -> Optional[relativedelta.relativedelta]: - """Coerce 'value' to an interval, if set or not nullable.""" - if not _not_null(value, field): - return None - if value is None: - raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") - - parsed = _INTERVAL_PATTERN.match(value) - if parsed is None: - raise ValueError(f"got interval: '{value}' with unexpected format") - - calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 - years = calendar_sign * int(parsed.group("years")) - months = calendar_sign * int(parsed.group("months")) - days = int(parsed.group("days")) - time_sign = -1 if parsed.group("time_sign") == "-" else 1 - hours = time_sign * int(parsed.group("hours")) - minutes = time_sign * int(parsed.group("minutes")) - seconds = time_sign * int(parsed.group("seconds")) - fraction = parsed.group("fraction") - microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0 - - return relativedelta.relativedelta( - years=years, - months=months, - days=days, - hours=hours, - minutes=minutes, - seconds=seconds, - microseconds=microseconds, - ) - - -def _float_from_json(value, field): - """Coerce 'value' to a float, if set or not nullable.""" - if _not_null(value, field): - return float(value) - - -def _decimal_from_json(value, field): - """Coerce 'value' to a Decimal, if set or not nullable.""" - if _not_null(value, field): - return decimal.Decimal(value) - - -def _bool_from_json(value, field): - """Coerce 'value' to a bool, if set or not nullable.""" - if _not_null(value, field): - return value.lower() in ["t", "true", "1"] - - -def _string_from_json(value, _): - """NOOP string -> string coercion""" - return value - - -def _bytes_from_json(value, field): - """Base64-decode value""" - if _not_null(value, field): - return base64.standard_b64decode(_to_bytes(value)) - - -def _timestamp_from_json(value, field): - """Coerce 'value' to a datetime, if set or not nullable.""" - if _not_null(value, field): - # value will be a integer in seconds, to microsecond precision, in UTC. - return _datetime_from_microseconds(int(value)) +class CellDataParser: + """Converter from BigQuery REST resource to Python value for RowIterator and similar classes. + See: "rows" field of + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list and + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults. + """ -def _timestamp_query_param_from_json(value, field): - """Coerce 'value' to a datetime, if set or not nullable. - - Args: - value (str): The timestamp. - - field (google.cloud.bigquery.schema.SchemaField): - The field corresponding to the value. + def to_py(self, resource, field): + def default_converter(value, field): + _warn_unknown_field_type(field) + return value - Returns: - Optional[datetime.datetime]: - The parsed datetime object from - ``value`` if the ``field`` is not null (otherwise it is - :data:`None`). - """ - if _not_null(value, field): - # Canonical formats for timestamps in BigQuery are flexible. See: - # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type - # The separator between the date and time can be 'T' or ' '. - value = value.replace(" ", "T", 1) - # The UTC timezone may be formatted as Z or +00:00. - value = value.replace("Z", "") - value = value.replace("+00:00", "") - - if "." in value: - # YYYY-MM-DDTHH:MM:SS.ffffff - return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU).replace( - tzinfo=UTC - ) + converter = getattr( + self, f"{field.field_type.lower()}_to_py", default_converter + ) + if field.mode == "REPEATED": + return [converter(item["v"], field) for item in resource] else: - # YYYY-MM-DDTHH:MM:SS - return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION).replace( - tzinfo=UTC + return converter(resource, field) + + def bool_to_py(self, value, field): + """Coerce 'value' to a bool, if set or not nullable.""" + if _not_null(value, field): + # TODO(tswast): Why does _not_null care if the field is NULLABLE or + # REQUIRED? Do we actually need such client-side validation? + if value is None: + raise TypeError(f"got None for required boolean field {field}") + return value.lower() in ("t", "true", "1") + + def boolean_to_py(self, value, field): + """Coerce 'value' to a bool, if set or not nullable.""" + return self.bool_to_py(value, field) + + def integer_to_py(self, value, field): + """Coerce 'value' to an int, if set or not nullable.""" + if _not_null(value, field): + return int(value) + + def int64_to_py(self, value, field): + """Coerce 'value' to an int, if set or not nullable.""" + return self.integer_to_py(value, field) + + def interval_to_py( + self, value: Optional[str], field + ) -> Optional[relativedelta.relativedelta]: + """Coerce 'value' to an interval, if set or not nullable.""" + if not _not_null(value, field): + return None + if value is None: + raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") + + parsed = _INTERVAL_PATTERN.match(value) + if parsed is None: + raise ValueError( + textwrap.dedent( + f""" + Got interval: '{value}' with unexpected format. + Expected interval in canonical format of "[sign]Y-M [sign]D [sign]H:M:S[.F]". + See: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type + for more information. + """ + ), ) - else: - return None + calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 + years = calendar_sign * int(parsed.group("years")) + months = calendar_sign * int(parsed.group("months")) + days = int(parsed.group("days")) + time_sign = -1 if parsed.group("time_sign") == "-" else 1 + hours = time_sign * int(parsed.group("hours")) + minutes = time_sign * int(parsed.group("minutes")) + seconds = time_sign * int(parsed.group("seconds")) + fraction = parsed.group("fraction") + microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0 + + return relativedelta.relativedelta( + years=years, + months=months, + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + microseconds=microseconds, + ) -def _datetime_from_json(value, field): - """Coerce 'value' to a datetime, if set or not nullable. + def float_to_py(self, value, field): + """Coerce 'value' to a float, if set or not nullable.""" + if _not_null(value, field): + return float(value) - Args: - value (str): The timestamp. - field (google.cloud.bigquery.schema.SchemaField): - The field corresponding to the value. + def float64_to_py(self, value, field): + """Coerce 'value' to a float, if set or not nullable.""" + return self.float_to_py(value, field) - Returns: - Optional[datetime.datetime]: - The parsed datetime object from - ``value`` if the ``field`` is not null (otherwise it is - :data:`None`). - """ - if _not_null(value, field): - if "." in value: - # YYYY-MM-DDTHH:MM:SS.ffffff - return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU) - else: - # YYYY-MM-DDTHH:MM:SS - return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) - else: - return None + def numeric_to_py(self, value, field): + """Coerce 'value' to a Decimal, if set or not nullable.""" + if _not_null(value, field): + return decimal.Decimal(value) + def bignumeric_to_py(self, value, field): + """Coerce 'value' to a Decimal, if set or not nullable.""" + return self.numeric_to_py(value, field) -def _date_from_json(value, field): - """Coerce 'value' to a datetime date, if set or not nullable""" - if _not_null(value, field): - # value will be a string, in YYYY-MM-DD form. - return _date_from_iso8601_date(value) + def string_to_py(self, value, _): + """NOOP string -> string coercion""" + return value + def geography_to_py(self, value, _): + """NOOP string -> string coercion""" + return value -def _time_from_json(value, field): - """Coerce 'value' to a datetime date, if set or not nullable""" - if _not_null(value, field): - if len(value) == 8: # HH:MM:SS - fmt = _TIMEONLY_WO_MICROS - elif len(value) == 15: # HH:MM:SS.micros - fmt = _TIMEONLY_W_MICROS + def bytes_to_py(self, value, field): + """Base64-decode value""" + if _not_null(value, field): + return base64.standard_b64decode(_to_bytes(value)) + + def timestamp_to_py(self, value, field): + """Coerce 'value' to a datetime, if set or not nullable.""" + if _not_null(value, field): + # value will be a integer in seconds, to microsecond precision, in UTC. + return _datetime_from_microseconds(int(value)) + + def datetime_to_py(self, value, field): + """Coerce 'value' to a datetime, if set or not nullable. + + Args: + value (str): The timestamp. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[datetime.datetime]: + The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). + """ + if _not_null(value, field): + if "." in value: + # YYYY-MM-DDTHH:MM:SS.ffffff + return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU) + else: + # YYYY-MM-DDTHH:MM:SS + return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) else: - raise ValueError("Unknown time format: {}".format(value)) - return datetime.datetime.strptime(value, fmt).time() + return None + def date_to_py(self, value, field): + """Coerce 'value' to a datetime date, if set or not nullable""" + if _not_null(value, field): + # value will be a string, in YYYY-MM-DD form. + return _date_from_iso8601_date(value) + + def time_to_py(self, value, field): + """Coerce 'value' to a datetime date, if set or not nullable""" + if _not_null(value, field): + if len(value) == 8: # HH:MM:SS + fmt = _TIMEONLY_WO_MICROS + elif len(value) == 15: # HH:MM:SS.micros + fmt = _TIMEONLY_W_MICROS + else: + raise ValueError( + textwrap.dedent( + f""" + Got {repr(value)} with unknown time format. + Expected HH:MM:SS or HH:MM:SS.micros. See + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type + for more information. + """ + ), + ) + return datetime.datetime.strptime(value, fmt).time() + + def record_to_py(self, value, field): + """Coerce 'value' to a mapping, if set or not nullable.""" + if _not_null(value, field): + record = {} + record_iter = zip(field.fields, value["f"]) + for subfield, cell in record_iter: + record[subfield.name] = self.to_py(cell["v"], subfield) + return record + + def struct_to_py(self, value, field): + """Coerce 'value' to a mapping, if set or not nullable.""" + return self.record_to_py(value, field) + + def json_to_py(self, value, field): + """Coerce 'value' to a Pythonic JSON representation.""" + if _not_null(value, field): + return json.loads(value) + else: + return None -def _record_from_json(value, field): - """Coerce 'value' to a mapping, if set or not nullable.""" - if _not_null(value, field): - record = {} - record_iter = zip(field.fields, value["f"]) - for subfield, cell in record_iter: - record[subfield.name] = _field_from_json(cell["v"], subfield) - return record + def _range_element_to_py(self, value, field_element_type): + """Coerce 'value' to a range element value.""" + # Avoid circular imports by importing here. + from google.cloud.bigquery import schema + if value == "UNBOUNDED": + return None + if field_element_type.element_type in _SUPPORTED_RANGE_ELEMENTS: + return self.to_py( + value, + schema.SchemaField("placeholder", field_element_type.element_type), + ) + else: + raise ValueError( + textwrap.dedent( + f""" + Got unsupported range element type: {field_element_type.element_type}. + Exptected one of {repr(_SUPPORTED_RANGE_ELEMENTS)}. See: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declare_a_range_type + for more information. + """ + ), + ) -def _json_from_json(value, field): - """Coerce 'value' to a Pythonic JSON representation.""" - if _not_null(value, field): - return json.loads(value) - else: - return None + def range_to_py(self, value, field): + """Coerce 'value' to a range, if set or not nullable. + + Args: + value (str): The literal representation of the range. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[dict]: + The parsed range object from ``value`` if the ``field`` is not + null (otherwise it is :data:`None`). + """ + if _not_null(value, field): + if _RANGE_PATTERN.match(value): + start, end = value[1:-1].split(", ") + start = self._range_element_to_py(start, field.range_element_type) + end = self._range_element_to_py(end, field.range_element_type) + return {"start": start, "end": end} + else: + raise ValueError( + textwrap.dedent( + f""" + Got unknown format for range value: {value}. + Expected format '[lower_bound, upper_bound)'. See: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_with_literal + for more information. + """ + ), + ) -def _range_element_from_json(value, field): - """Coerce 'value' to a range element value.""" - if value == "UNBOUNDED": - return None - if field.element_type in _SUPPORTED_RANGE_ELEMENTS: - return _CELLDATA_FROM_JSON[field.element_type](value, field.element_type) - else: - raise ValueError(f"Unsupported range element type: {field.element_type}") +CELL_DATA_PARSER = CellDataParser() -def _range_from_json(value, field): - """Coerce 'value' to a range, if set or not nullable. +class ScalarQueryParamParser(CellDataParser): + """Override of CellDataParser to handle the differences in the response from query params. - Args: - value (str): The literal representation of the range. - field (google.cloud.bigquery.schema.SchemaField): - The field corresponding to the value. - - Returns: - Optional[dict]: - The parsed range object from ``value`` if the ``field`` is not - null (otherwise it is :data:`None`). + See: "value" field of + https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#QueryParameterValue """ - if _not_null(value, field): - if _RANGE_PATTERN.match(value): - start, end = value[1:-1].split(", ") - start = _range_element_from_json(start, field.range_element_type) - end = _range_element_from_json(end, field.range_element_type) - return {"start": start, "end": end} - else: - raise ValueError(f"Unknown format for range value: {value}") - else: - return None + def timestamp_to_py(self, value, field): + """Coerce 'value' to a datetime, if set or not nullable. + + Args: + value (str): The timestamp. + + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[datetime.datetime]: + The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). + """ + if _not_null(value, field): + # Canonical formats for timestamps in BigQuery are flexible. See: + # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type + # The separator between the date and time can be 'T' or ' '. + value = value.replace(" ", "T", 1) + # The UTC timezone may be formatted as Z or +00:00. + value = value.replace("Z", "") + value = value.replace("+00:00", "") + + if "." in value: + # YYYY-MM-DDTHH:MM:SS.ffffff + return datetime.datetime.strptime( + value, _RFC3339_MICROS_NO_ZULU + ).replace(tzinfo=UTC) + else: + # YYYY-MM-DDTHH:MM:SS + return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION).replace( + tzinfo=UTC + ) + else: + return None -# Parse BigQuery API response JSON into a Python representation. -_CELLDATA_FROM_JSON = { - "INTEGER": _int_from_json, - "INT64": _int_from_json, - "INTERVAL": _interval_from_json, - "FLOAT": _float_from_json, - "FLOAT64": _float_from_json, - "NUMERIC": _decimal_from_json, - "BIGNUMERIC": _decimal_from_json, - "BOOLEAN": _bool_from_json, - "BOOL": _bool_from_json, - "STRING": _string_from_json, - "GEOGRAPHY": _string_from_json, - "BYTES": _bytes_from_json, - "TIMESTAMP": _timestamp_from_json, - "DATETIME": _datetime_from_json, - "DATE": _date_from_json, - "TIME": _time_from_json, - "RECORD": _record_from_json, - "JSON": _json_from_json, - "RANGE": _range_from_json, -} -_QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) -_QUERY_PARAMS_FROM_JSON["TIMESTAMP"] = _timestamp_query_param_from_json +SCALAR_QUERY_PARAM_PARSER = ScalarQueryParamParser() def _field_to_index_mapping(schema): @@ -377,18 +440,6 @@ def _field_to_index_mapping(schema): return {f.name: i for i, f in enumerate(schema)} -def _field_from_json(resource, field): - def default_converter(value, field): - _warn_unknown_field_type(field) - return value - - converter = _CELLDATA_FROM_JSON.get(field.field_type, default_converter) - if field.mode == "REPEATED": - return [converter(item["v"], field) for item in resource] - else: - return converter(resource, field) - - def _row_tuple_from_json(row, schema): """Convert JSON row data to row with appropriate types. @@ -410,7 +461,7 @@ def _row_tuple_from_json(row, schema): row_data = [] for field, cell in zip(schema, row["f"]): - row_data.append(_field_from_json(cell["v"], field)) + row_data.append(CELL_DATA_PARSER.to_py(cell["v"], field)) return tuple(row_data) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index f1090a7dc..8745c09f5 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -18,11 +18,11 @@ import copy import datetime import decimal -from typing import Any, Optional, Dict, Union +from typing import Any, cast, Optional, Dict, Union from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery import _helpers from google.cloud.bigquery._helpers import _rows_from_json -from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM from google.cloud.bigquery._helpers import _SUPPORTED_RANGE_ELEMENTS @@ -571,6 +571,9 @@ def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": Returns: google.cloud.bigquery.query.ScalarQueryParameter: Instance """ + # Import here to avoid circular imports. + from google.cloud.bigquery import schema + name = resource.get("name") type_ = resource["parameterType"]["type"] @@ -578,7 +581,9 @@ def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": # from the back-end - the latter omits it for None values. value = resource.get("parameterValue", {}).get("value") if value is not None: - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + converted = _helpers.SCALAR_QUERY_PARAM_PARSER.to_py( + value, schema.SchemaField(cast(str, name), type_) + ) else: converted = None @@ -693,13 +698,20 @@ def _from_api_repr_struct(cls, resource): @classmethod def _from_api_repr_scalar(cls, resource): + """Converts REST resource into a list of scalar values.""" + # Import here to avoid circular imports. + from google.cloud.bigquery import schema + name = resource.get("name") array_type = resource["parameterType"]["arrayType"]["type"] parameter_value = resource.get("parameterValue", {}) array_values = parameter_value.get("arrayValues", ()) values = [value["value"] for value in array_values] converted = [ - _QUERY_PARAMS_FROM_JSON[array_type](value, None) for value in values + _helpers.SCALAR_QUERY_PARAM_PARSER.to_py( + value, schema.SchemaField(name, array_type) + ) + for value in values ] return cls(name, array_type, converted) @@ -850,6 +862,9 @@ def from_api_repr(cls, resource: dict) -> "StructQueryParameter": Returns: google.cloud.bigquery.query.StructQueryParameter: Instance """ + # Import here to avoid circular imports. + from google.cloud.bigquery import schema + name = resource.get("name") instance = cls(name) type_resources = {} @@ -877,7 +892,9 @@ def from_api_repr(cls, resource: dict) -> "StructQueryParameter": converted = ArrayQueryParameter.from_api_repr(struct_resource) else: value = value["value"] - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + converted = _helpers.SCALAR_QUERY_PARAM_PARSER.to_py( + value, schema.SchemaField(cast(str, name), type_) + ) instance.struct_values[key] = converted return instance diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 4d79d60da..f139e44ad 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -3533,7 +3533,7 @@ def _row_iterator_page_columns(schema, response): def get_column_data(field_index, field): for row in rows: - yield _helpers._field_from_json(row["f"][field_index]["v"], field) + yield _helpers.CELL_DATA_PARSER.to_py(row["f"][field_index]["v"], field) for field_index, field in enumerate(schema): columns.append(get_column_data(field_index, field)) diff --git a/tests/unit/_helpers/test_cell_data_parser.py b/tests/unit/_helpers/test_cell_data_parser.py new file mode 100644 index 000000000..14721a26c --- /dev/null +++ b/tests/unit/_helpers/test_cell_data_parser.py @@ -0,0 +1,467 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import datetime +import decimal +import json + +from dateutil.relativedelta import relativedelta +import pytest + +import google.cloud.bigquery.schema + + +def create_field(mode="NULLABLE", type_="IGNORED", name="test_field", **kwargs): + return google.cloud.bigquery.schema.SchemaField(name, type_, mode=mode, **kwargs) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +@pytest.fixture +def object_under_test(mut): + return mut.CELL_DATA_PARSER + + +ALL_TYPES = { + "BOOL", + "BOOLEAN", + "BYTES", + "INTEGER", + "INT64", + "INTERVAL", + "FLOAT", + "FLOAT64", + "NUMERIC", + "BIGNUMERIC", + "STRING", + "GEOGRAPHY", + "TIMESTAMP", + "DATETIME", + "DATE", + "TIME", + "RECORD", + "STRUCT", + "JSON", + "RANGE", +} + +TYPES_WITH_CLIENT_SIDE_NULL_VALIDATION = ALL_TYPES - { + "STRING", + "GEOGRAPHY", +} + + +@pytest.mark.parametrize( + "type_", + list(sorted(ALL_TYPES)), +) +def test_to_py_w_none_nullable(object_under_test, type_): + assert object_under_test.to_py(None, create_field("NULLABLE", type_)) is None + + +@pytest.mark.parametrize("type_", list(sorted(TYPES_WITH_CLIENT_SIDE_NULL_VALIDATION))) +def test_to_py_w_none_required(object_under_test, type_): + with pytest.raises(TypeError): + object_under_test.to_py(None, create_field("REQUIRED", type_)) + + +def test_interval_to_py_w_invalid_format(object_under_test): + with pytest.raises(ValueError, match="NOT_AN_INTERVAL"): + object_under_test.interval_to_py("NOT_AN_INTERVAL", create_field()) + + +@pytest.mark.parametrize( + ("value", "expected"), + ( + ("0-0 0 0:0:0", relativedelta()), + # SELECT INTERVAL X YEAR + ("-10000-0 0 0:0:0", relativedelta(years=-10000)), + ("-1-0 0 0:0:0", relativedelta(years=-1)), + ("1-0 0 0:0:0", relativedelta(years=1)), + ("10000-0 0 0:0:0", relativedelta(years=10000)), + # SELECT INTERVAL X MONTH + ("-0-11 0 0:0:0", relativedelta(months=-11)), + ("-0-1 0 0:0:0", relativedelta(months=-1)), + ("0-1 0 0:0:0", relativedelta(months=1)), + ("0-11 0 0:0:0", relativedelta(months=11)), + # SELECT INTERVAL X DAY + ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)), + ("0-0 -1 0:0:0", relativedelta(days=-1)), + ("0-0 1 0:0:0", relativedelta(days=1)), + ("0-0 3660000 0:0:0", relativedelta(days=3660000)), + # SELECT INTERVAL X HOUR + ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)), + ("0-0 0 -1:0:0", relativedelta(hours=-1)), + ("0-0 0 1:0:0", relativedelta(hours=1)), + ("0-0 0 87840000:0:0", relativedelta(hours=87840000)), + # SELECT INTERVAL X MINUTE + ("0-0 0 -0:59:0", relativedelta(minutes=-59)), + ("0-0 0 -0:1:0", relativedelta(minutes=-1)), + ("0-0 0 0:1:0", relativedelta(minutes=1)), + ("0-0 0 0:59:0", relativedelta(minutes=59)), + # SELECT INTERVAL X SECOND + ("0-0 0 -0:0:59", relativedelta(seconds=-59)), + ("0-0 0 -0:0:1", relativedelta(seconds=-1)), + ("0-0 0 0:0:1", relativedelta(seconds=1)), + ("0-0 0 0:0:59", relativedelta(seconds=59)), + # SELECT (INTERVAL -1 SECOND) / 1000000 + ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), + ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)), + ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)), + ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)), + ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)), + # Test with multiple digits in each section. + ( + "32-11 45 67:16:23.987654", + relativedelta( + years=32, + months=11, + days=45, + hours=67, + minutes=16, + seconds=23, + microseconds=987654, + ), + ), + ( + "-32-11 -45 -67:16:23.987654", + relativedelta( + years=-32, + months=-11, + days=-45, + hours=-67, + minutes=-16, + seconds=-23, + microseconds=-987654, + ), + ), + # Test with mixed +/- sections. + ( + "9999-9 -999999 9999999:59:59.999999", + relativedelta( + years=9999, + months=9, + days=-999999, + hours=9999999, + minutes=59, + seconds=59, + microseconds=999999, + ), + ), + # Test with fraction that is not microseconds. + ("0-0 0 0:0:42.", relativedelta(seconds=42)), + ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)), + ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), + ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), + ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), + # Fractional seconds can cause rounding problems if cast to float. See: + # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 + ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)), + ( + "0-0 0 01:01:01.010101", + relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101), + ), + ( + "0-0 0 09:09:09.090909", + relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909), + ), + ( + "0-0 0 11:11:11.111111", + relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111), + ), + ( + "0-0 0 19:16:23.987654", + relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654), + ), + # Nanoseconds are not expected, but should not cause error. + ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)), + ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)), + ), +) +def test_interval_to_py_w_string_values(object_under_test, value, expected): + got = object_under_test.interval_to_py(value, create_field()) + assert got == expected + + +def test_integer_to_py_w_string_value(object_under_test): + coerced = object_under_test.integer_to_py("42", object()) + assert coerced == 42 + + +def test_integer_to_py_w_float_value(object_under_test): + coerced = object_under_test.integer_to_py(42.0, object()) + assert coerced == 42 + + +def test_json_to_py_w_json_field(object_under_test): + data_field = create_field("REQUIRED", "data", "JSON") + + value = json.dumps( + {"v": {"key": "value"}}, + ) + + expected_output = {"v": {"key": "value"}} + coerced_output = object_under_test.json_to_py(value, data_field) + assert coerced_output == expected_output + + +def test_json_to_py_w_string_value(object_under_test): + coerced = object_under_test.json_to_py('"foo"', create_field()) + assert coerced == "foo" + + +def test_float_to_py_w_string_value(object_under_test): + coerced = object_under_test.float_to_py("3.1415", object()) + assert coerced == 3.1415 + + +def test_float_to_py_w_float_value(object_under_test): + coerced = object_under_test.float_to_py(3.1415, object()) + assert coerced == 3.1415 + + +def test_numeric_to_py_w_string_value(object_under_test): + coerced = object_under_test.numeric_to_py("3.1415", object()) + assert coerced == decimal.Decimal("3.1415") + + +def test_numeric_to_py_w_float_value(object_under_test): + coerced = object_under_test.numeric_to_py(3.1415, object()) + # There is no exact float representation of 3.1415. + assert coerced == decimal.Decimal(3.1415) + + +def test_bool_to_py_w_value_t(object_under_test): + coerced = object_under_test.bool_to_py("T", object()) + assert coerced is True + + +def test_bool_to_py_w_value_true(object_under_test): + coerced = object_under_test.bool_to_py("True", object()) + assert coerced is True + + +def test_bool_to_py_w_value_1(object_under_test): + coerced = object_under_test.bool_to_py("1", object()) + assert coerced is True + + +def test_bool_to_py_w_value_other(object_under_test): + coerced = object_under_test.bool_to_py("f", object()) + assert coerced is False + + +def test_string_to_py_w_string_value(object_under_test): + coerced = object_under_test.string_to_py("Wonderful!", object()) + assert coerced == "Wonderful!" + + +def test_bytes_to_py_w_base64_encoded_bytes(object_under_test): + expected = b"Wonderful!" + encoded = base64.standard_b64encode(expected) + coerced = object_under_test.bytes_to_py(encoded, object()) + assert coerced == expected + + +def test_bytes_to_py_w_base64_encoded_text(object_under_test): + expected = b"Wonderful!" + encoded = base64.standard_b64encode(expected).decode("ascii") + coerced = object_under_test.bytes_to_py(encoded, object()) + assert coerced == expected + + +def test_timestamp_to_py_w_string_int_value(object_under_test): + from google.cloud._helpers import _EPOCH + + coerced = object_under_test.timestamp_to_py("1234567", object()) + assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) + + +def test_timestamp_to_py_w_int_value(object_under_test): + from google.cloud._helpers import _EPOCH + + coerced = object_under_test.timestamp_to_py(1234567, object()) + assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) + + +def test_datetime_to_py_w_string_value(object_under_test): + coerced = object_under_test.datetime_to_py("2016-12-02T18:51:33", object()) + assert coerced == datetime.datetime(2016, 12, 2, 18, 51, 33) + + +def test_datetime_to_py_w_microseconds(object_under_test): + coerced = object_under_test.datetime_to_py("2015-05-22T10:11:12.987654", object()) + assert coerced == datetime.datetime(2015, 5, 22, 10, 11, 12, 987654) + + +def test_date_to_py_w_string_value(object_under_test): + coerced = object_under_test.date_to_py("1987-09-22", object()) + assert coerced == datetime.date(1987, 9, 22) + + +def test_time_to_py_w_string_value(object_under_test): + coerced = object_under_test.time_to_py("12:12:27", object()) + assert coerced == datetime.time(12, 12, 27) + + +def test_time_to_py_w_subsecond_string_value(object_under_test): + coerced = object_under_test.time_to_py("12:12:27.123456", object()) + assert coerced == datetime.time(12, 12, 27, 123456) + + +def test_time_to_py_w_bogus_string_value(object_under_test): + with pytest.raises(ValueError): + object_under_test.time_to_py("12:12:27.123", object()) + + +def test_range_to_py_w_wrong_format(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type="DATE", + ) + with pytest.raises(ValueError): + object_under_test.range_to_py("[2009-06-172019-06-17)", range_field) + + +def test_range_to_py_w_wrong_element_type(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type=google.cloud.bigquery.schema.FieldElementType( + element_type="TIME" + ), + ) + with pytest.raises(ValueError): + object_under_test.range_to_py("[15:31:38, 15:50:38)", range_field) + + +def test_range_to_py_w_unbounded_value(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type="DATE", + ) + coerced = object_under_test.range_to_py("[UNBOUNDED, 2019-06-17)", range_field) + assert coerced == {"start": None, "end": datetime.date(2019, 6, 17)} + + +def test_range_to_py_w_date_value(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type="DATE", + ) + coerced = object_under_test.range_to_py("[2009-06-17, 2019-06-17)", range_field) + assert coerced == { + "start": datetime.date(2009, 6, 17), + "end": datetime.date(2019, 6, 17), + } + + +def test_range_to_py_w_datetime_value(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type=google.cloud.bigquery.schema.FieldElementType( + element_type="DATETIME" + ), + ) + coerced = object_under_test.range_to_py( + "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field + ) + assert coerced == { + "start": datetime.datetime(2009, 6, 17, 13, 45, 30), + "end": datetime.datetime(2019, 6, 17, 13, 45, 30), + } + + +def test_range_to_py_w_timestamp_value(object_under_test): + from google.cloud._helpers import _EPOCH + + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type=google.cloud.bigquery.schema.FieldElementType( + element_type="TIMESTAMP" + ), + ) + coerced = object_under_test.range_to_py("[1234567, 1234789)", range_field) + assert coerced == { + "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567), + "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789), + } + + +def test_record_to_py_w_nullable_subfield_none(object_under_test): + subfield = create_field("NULLABLE", "INTEGER", name="age") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": None}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"age": None} + + +def test_record_to_py_w_scalar_subfield(object_under_test): + subfield = create_field("REQUIRED", "INTEGER", name="age") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": 42}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"age": 42} + + +def test_record_to_py_w_scalar_subfield_geography(object_under_test): + subfield = create_field("REQUIRED", "GEOGRAPHY", name="geo") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": "POINT(1, 2)"}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"geo": "POINT(1, 2)"} + + +def test_record_to_py_w_repeated_subfield(object_under_test): + subfield = create_field("REPEATED", "STRING", name="color") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": [{"v": "red"}, {"v": "yellow"}, {"v": "blue"}]}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"color": ["red", "yellow", "blue"]} + + +def test_record_to_py_w_record_subfield(object_under_test): + full_name = create_field("REQUIRED", "STRING", name="full_name") + area_code = create_field("REQUIRED", "STRING", name="area_code") + local_number = create_field("REQUIRED", "STRING", name="local_number") + rank = create_field("REQUIRED", "INTEGER", name="rank") + phone = create_field( + "NULLABLE", "RECORD", name="phone", fields=[area_code, local_number, rank] + ) + person = create_field( + "REQUIRED", "RECORD", name="person", fields=[full_name, phone] + ) + value = { + "f": [ + {"v": "Phred Phlyntstone"}, + {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}}, + ] + } + expected = { + "full_name": "Phred Phlyntstone", + "phone": {"area_code": "800", "local_number": "555-1212", "rank": 1}, + } + coerced = object_under_test.record_to_py(value, person) + assert coerced == expected diff --git a/tests/unit/_helpers/test_from_json.py b/tests/unit/_helpers/test_from_json.py deleted file mode 100644 index 65b054f44..000000000 --- a/tests/unit/_helpers/test_from_json.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dateutil.relativedelta import relativedelta -import pytest - -from google.cloud.bigquery.schema import SchemaField - - -def create_field(mode="NULLABLE", type_="IGNORED"): - return SchemaField("test_field", type_, mode=mode) - - -@pytest.fixture -def mut(): - from google.cloud.bigquery import _helpers - - return _helpers - - -def test_interval_from_json_w_none_nullable(mut): - got = mut._interval_from_json(None, create_field()) - assert got is None - - -def test_interval_from_json_w_none_required(mut): - with pytest.raises(TypeError): - mut._interval_from_json(None, create_field(mode="REQUIRED")) - - -def test_interval_from_json_w_invalid_format(mut): - with pytest.raises(ValueError, match="NOT_AN_INTERVAL"): - mut._interval_from_json("NOT_AN_INTERVAL", create_field()) - - -@pytest.mark.parametrize( - ("value", "expected"), - ( - ("0-0 0 0:0:0", relativedelta()), - # SELECT INTERVAL X YEAR - ("-10000-0 0 0:0:0", relativedelta(years=-10000)), - ("-1-0 0 0:0:0", relativedelta(years=-1)), - ("1-0 0 0:0:0", relativedelta(years=1)), - ("10000-0 0 0:0:0", relativedelta(years=10000)), - # SELECT INTERVAL X MONTH - ("-0-11 0 0:0:0", relativedelta(months=-11)), - ("-0-1 0 0:0:0", relativedelta(months=-1)), - ("0-1 0 0:0:0", relativedelta(months=1)), - ("0-11 0 0:0:0", relativedelta(months=11)), - # SELECT INTERVAL X DAY - ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)), - ("0-0 -1 0:0:0", relativedelta(days=-1)), - ("0-0 1 0:0:0", relativedelta(days=1)), - ("0-0 3660000 0:0:0", relativedelta(days=3660000)), - # SELECT INTERVAL X HOUR - ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)), - ("0-0 0 -1:0:0", relativedelta(hours=-1)), - ("0-0 0 1:0:0", relativedelta(hours=1)), - ("0-0 0 87840000:0:0", relativedelta(hours=87840000)), - # SELECT INTERVAL X MINUTE - ("0-0 0 -0:59:0", relativedelta(minutes=-59)), - ("0-0 0 -0:1:0", relativedelta(minutes=-1)), - ("0-0 0 0:1:0", relativedelta(minutes=1)), - ("0-0 0 0:59:0", relativedelta(minutes=59)), - # SELECT INTERVAL X SECOND - ("0-0 0 -0:0:59", relativedelta(seconds=-59)), - ("0-0 0 -0:0:1", relativedelta(seconds=-1)), - ("0-0 0 0:0:1", relativedelta(seconds=1)), - ("0-0 0 0:0:59", relativedelta(seconds=59)), - # SELECT (INTERVAL -1 SECOND) / 1000000 - ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), - ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)), - ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)), - ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)), - ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)), - # Test with multiple digits in each section. - ( - "32-11 45 67:16:23.987654", - relativedelta( - years=32, - months=11, - days=45, - hours=67, - minutes=16, - seconds=23, - microseconds=987654, - ), - ), - ( - "-32-11 -45 -67:16:23.987654", - relativedelta( - years=-32, - months=-11, - days=-45, - hours=-67, - minutes=-16, - seconds=-23, - microseconds=-987654, - ), - ), - # Test with mixed +/- sections. - ( - "9999-9 -999999 9999999:59:59.999999", - relativedelta( - years=9999, - months=9, - days=-999999, - hours=9999999, - minutes=59, - seconds=59, - microseconds=999999, - ), - ), - # Test with fraction that is not microseconds. - ("0-0 0 0:0:42.", relativedelta(seconds=42)), - ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)), - ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), - ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), - ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), - # Fractional seconds can cause rounding problems if cast to float. See: - # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 - ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)), - ( - "0-0 0 01:01:01.010101", - relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101), - ), - ( - "0-0 0 09:09:09.090909", - relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909), - ), - ( - "0-0 0 11:11:11.111111", - relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111), - ), - ( - "0-0 0 19:16:23.987654", - relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654), - ), - # Nanoseconds are not expected, but should not cause error. - ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)), - ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)), - ), -) -def test_w_string_values(mut, value, expected): - got = mut._interval_from_json(value, create_field()) - assert got == expected diff --git a/tests/unit/_helpers/test_scalar_query_param_parser.py b/tests/unit/_helpers/test_scalar_query_param_parser.py new file mode 100644 index 000000000..8e0d2a34e --- /dev/null +++ b/tests/unit/_helpers/test_scalar_query_param_parser.py @@ -0,0 +1,93 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import pytest + +import google.cloud.bigquery.schema + + +def create_field(mode="NULLABLE", type_="IGNORED"): + return google.cloud.bigquery.schema.SchemaField("test_field", type_, mode=mode) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +@pytest.fixture +def object_under_test(mut): + return mut.SCALAR_QUERY_PARAM_PARSER + + +def test_timestamp_to_py_w_none_nullable(object_under_test): + assert object_under_test.timestamp_to_py(None, create_field()) is None + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ( + "2016-12-20 15:58:27.339328+00:00", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20 15:58:27+00:00", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ( + "2016-12-20T15:58:27.339328+00:00", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20T15:58:27+00:00", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ( + "2016-12-20 15:58:27.339328Z", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20 15:58:27Z", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ( + "2016-12-20T15:58:27.339328Z", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20T15:58:27Z", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ], +) +def test_timestamp_to_py_w_timestamp_valid(object_under_test, value, expected): + assert object_under_test.timestamp_to_py(value, create_field()) == expected + + +def test_timestamp_to_py_w_timestamp_invalid(object_under_test): + with pytest.raises(ValueError): + object_under_test.timestamp_to_py("definitely-not-a-timestamp", create_field()) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index adba6327c..4e53236e3 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import base64 import datetime import decimal import json @@ -133,484 +132,6 @@ def test_w_value(self): self.assertTrue(self._call_fut(object(), object())) -class Test_int_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _int_from_json - - return _int_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("42", object()) - self.assertEqual(coerced, 42) - - def test_w_float_value(self): - coerced = self._call_fut(42, object()) - self.assertEqual(coerced, 42) - - -class Test_json_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _json_from_json - - return _json_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_json_field(self): - data_field = _Field("REQUIRED", "data", "JSON") - - value = json.dumps( - {"v": {"key": "value"}}, - ) - - expected_output = {"v": {"key": "value"}} - coerced_output = self._call_fut(value, data_field) - self.assertEqual(coerced_output, expected_output) - - def test_w_string_value(self): - coerced = self._call_fut('"foo"', object()) - self.assertEqual(coerced, "foo") - - -class Test_float_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _float_from_json - - return _float_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("3.1415", object()) - self.assertEqual(coerced, 3.1415) - - def test_w_float_value(self): - coerced = self._call_fut(3.1415, object()) - self.assertEqual(coerced, 3.1415) - - -class Test_decimal_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _decimal_from_json - - return _decimal_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("3.1415", object()) - self.assertEqual(coerced, decimal.Decimal("3.1415")) - - def test_w_float_value(self): - coerced = self._call_fut(3.1415, object()) - # There is no exact float representation of 3.1415. - self.assertEqual(coerced, decimal.Decimal(3.1415)) - - -class Test_bool_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _bool_from_json - - return _bool_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(AttributeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_value_t(self): - coerced = self._call_fut("T", object()) - self.assertTrue(coerced) - - def test_w_value_true(self): - coerced = self._call_fut("True", object()) - self.assertTrue(coerced) - - def test_w_value_1(self): - coerced = self._call_fut("1", object()) - self.assertTrue(coerced) - - def test_w_value_other(self): - coerced = self._call_fut("f", object()) - self.assertFalse(coerced) - - -class Test_string_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _string_from_json - - return _string_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - self.assertIsNone(self._call_fut(None, _Field("REQUIRED"))) - - def test_w_string_value(self): - coerced = self._call_fut("Wonderful!", object()) - self.assertEqual(coerced, "Wonderful!") - - -class Test_bytes_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _bytes_from_json - - return _bytes_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_base64_encoded_bytes(self): - expected = b"Wonderful!" - encoded = base64.standard_b64encode(expected) - coerced = self._call_fut(encoded, object()) - self.assertEqual(coerced, expected) - - def test_w_base64_encoded_text(self): - expected = b"Wonderful!" - encoded = base64.standard_b64encode(expected).decode("ascii") - coerced = self._call_fut(encoded, object()) - self.assertEqual(coerced, expected) - - -class Test_timestamp_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _timestamp_from_json - - return _timestamp_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_int_value(self): - from google.cloud._helpers import _EPOCH - - coerced = self._call_fut("1234567", object()) - self.assertEqual( - coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) - ) - - def test_w_int_value(self): - from google.cloud._helpers import _EPOCH - - coerced = self._call_fut(1234567, object()) - self.assertEqual( - coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) - ) - - -class Test_timestamp_query_param_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery import _helpers - - return _helpers._timestamp_query_param_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_timestamp_valid(self): - from google.cloud._helpers import UTC - - samples = [ - ( - "2016-12-20 15:58:27.339328+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20 15:58:27+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27.339328+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ( - "2016-12-20 15:58:27.339328Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20 15:58:27Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27.339328Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ] - for timestamp_str, expected_result in samples: - self.assertEqual( - self._call_fut(timestamp_str, _Field("NULLABLE")), expected_result - ) - - def test_w_timestamp_invalid(self): - with self.assertRaises(ValueError): - self._call_fut("definitely-not-a-timestamp", _Field("NULLABLE")) - - -class Test_datetime_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _datetime_from_json - - return _datetime_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("2016-12-02T18:51:33", object()) - self.assertEqual(coerced, datetime.datetime(2016, 12, 2, 18, 51, 33)) - - def test_w_microseconds(self): - coerced = self._call_fut("2015-05-22T10:11:12.987654", object()) - self.assertEqual(coerced, datetime.datetime(2015, 5, 22, 10, 11, 12, 987654)) - - -class Test_date_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _date_from_json - - return _date_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("1987-09-22", object()) - self.assertEqual(coerced, datetime.date(1987, 9, 22)) - - -class Test_time_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _time_from_json - - return _time_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("12:12:27", object()) - self.assertEqual(coerced, datetime.time(12, 12, 27)) - - def test_w_subsecond_string_value(self): - coerced = self._call_fut("12:12:27.123456", object()) - self.assertEqual(coerced, datetime.time(12, 12, 27, 123456)) - - def test_w_bogus_string_value(self): - with self.assertRaises(ValueError): - self._call_fut("12:12:27.123", object()) - - -class Test_range_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _range_from_json - - return _range_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_wrong_format(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATE"), - ) - with self.assertRaises(ValueError): - self._call_fut("[2009-06-172019-06-17)", range_field) - - def test_w_wrong_element_type(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="TIME"), - ) - with self.assertRaises(ValueError): - self._call_fut("[15:31:38, 15:50:38)", range_field) - - def test_w_unbounded_value(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATE"), - ) - coerced = self._call_fut("[UNBOUNDED, 2019-06-17)", range_field) - self.assertEqual( - coerced, - {"start": None, "end": datetime.date(2019, 6, 17)}, - ) - - def test_w_date_value(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATE"), - ) - coerced = self._call_fut("[2009-06-17, 2019-06-17)", range_field) - self.assertEqual( - coerced, - { - "start": datetime.date(2009, 6, 17), - "end": datetime.date(2019, 6, 17), - }, - ) - - def test_w_datetime_value(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATETIME"), - ) - coerced = self._call_fut( - "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field - ) - self.assertEqual( - coerced, - { - "start": datetime.datetime(2009, 6, 17, 13, 45, 30), - "end": datetime.datetime(2019, 6, 17, 13, 45, 30), - }, - ) - - def test_w_timestamp_value(self): - from google.cloud._helpers import _EPOCH - - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="TIMESTAMP"), - ) - coerced = self._call_fut("[1234567, 1234789)", range_field) - self.assertEqual( - coerced, - { - "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567), - "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789), - }, - ) - - -class Test_record_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _record_from_json - - return _record_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_nullable_subfield_none(self): - subfield = _Field("NULLABLE", "age", "INTEGER") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": None}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"age": None}) - - def test_w_scalar_subfield(self): - subfield = _Field("REQUIRED", "age", "INTEGER") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": 42}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"age": 42}) - - def test_w_scalar_subfield_geography(self): - subfield = _Field("REQUIRED", "geo", "GEOGRAPHY") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": "POINT(1, 2)"}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"geo": "POINT(1, 2)"}) - - def test_w_repeated_subfield(self): - subfield = _Field("REPEATED", "color", "STRING") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": [{"v": "red"}, {"v": "yellow"}, {"v": "blue"}]}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"color": ["red", "yellow", "blue"]}) - - def test_w_record_subfield(self): - full_name = _Field("REQUIRED", "full_name", "STRING") - area_code = _Field("REQUIRED", "area_code", "STRING") - local_number = _Field("REQUIRED", "local_number", "STRING") - rank = _Field("REQUIRED", "rank", "INTEGER") - phone = _Field( - "NULLABLE", "phone", "RECORD", fields=[area_code, local_number, rank] - ) - person = _Field("REQUIRED", "person", "RECORD", fields=[full_name, phone]) - value = { - "f": [ - {"v": "Phred Phlyntstone"}, - {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}}, - ] - } - expected = { - "full_name": "Phred Phlyntstone", - "phone": {"area_code": "800", "local_number": "555-1212", "rank": 1}, - } - coerced = self._call_fut(value, person) - self.assertEqual(coerced, expected) - - class Test_field_to_index_mapping(unittest.TestCase): def _call_fut(self, schema): from google.cloud.bigquery._helpers import _field_to_index_mapping From 968020d5be9d2a30b90d046eaf52f91bb2c70911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 20 Mar 2025 11:08:48 -0500 Subject: [PATCH 421/536] fix: avoid "Unable to determine type" warning with JSON columns in `to_dataframe` (#1876) * add regression tests for empty dataframe * fix arrow test to be compatible with old pyarrow --- google/cloud/bigquery/_helpers.py | 15 ++++ google/cloud/bigquery/_pandas_helpers.py | 1 + google/cloud/bigquery/_pyarrow_helpers.py | 26 ++++++- google/cloud/bigquery/table.py | 4 +- tests/system/test_arrow.py | 29 ++++++++ tests/system/test_pandas.py | 26 +++++++ .../test_data_frame_cell_data_parser.py | 71 +++++++++++++++++++ tests/unit/test__pyarrow_helpers.py | 12 +++- tests/unit/test_table_arrow.py | 66 +++++++++++------ tests/unit/test_table_pandas.py | 4 ++ 10 files changed, 230 insertions(+), 24 deletions(-) create mode 100644 tests/unit/_helpers/test_data_frame_cell_data_parser.py diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 4ba3ccf93..76c4f1fbd 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -387,6 +387,21 @@ def range_to_py(self, value, field): CELL_DATA_PARSER = CellDataParser() +class DataFrameCellDataParser(CellDataParser): + """Override of CellDataParser to handle differences in expection of values in DataFrame-like outputs. + + This is used to turn the output of the REST API into a pyarrow Table, + emulating the serialized arrow from the BigQuery Storage Read API. + """ + + def json_to_py(self, value, _): + """No-op because DataFrame expects string for JSON output.""" + return value + + +DATA_FRAME_CELL_DATA_PARSER = DataFrameCellDataParser() + + class ScalarQueryParamParser(CellDataParser): """Override of CellDataParser to handle the differences in the response from query params. diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index feb6b3adb..457eb9078 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -158,6 +158,7 @@ def finish(self): b"ARROW:extension:metadata": b'{"encoding": "WKT"}', }, "DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"}, + "JSON": {b"ARROW:extension:name": b"google:sqlType:json"}, } diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 034e020ee..03c70bf63 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -15,7 +15,9 @@ """Shared helper functions for connecting BigQuery and pyarrow. NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, -instead. See: go/pandas-gbq-and-bigframes-redundancy and +instead. See: go/pandas-gbq-and-bigframes-redundancy, +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/bigquery_to_pyarrow.py +and https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py """ @@ -26,6 +28,14 @@ except ImportError: pyarrow = None +try: + import db_dtypes # type: ignore + + db_dtypes_import_exception = None +except ImportError as exc: + db_dtypes = None + db_dtypes_import_exception = exc + def pyarrow_datetime(): return pyarrow.timestamp("us", tz=None) @@ -67,12 +77,18 @@ def pyarrow_timestamp(): "GEOGRAPHY": pyarrow.string, "INT64": pyarrow.int64, "INTEGER": pyarrow.int64, + # Normally, we'd prefer JSON type built-in to pyarrow (added in 19.0.0), + # but we'd like this to map as closely to the BQ Storage API as + # possible, which uses the string() dtype, as JSON support in Arrow + # predates JSON support in BigQuery by several years. + "JSON": pyarrow.string, "NUMERIC": pyarrow_numeric, "STRING": pyarrow.string, "TIME": pyarrow_time, "TIMESTAMP": pyarrow_timestamp, } + # DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead. _ARROW_SCALAR_IDS_TO_BQ = { # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes pyarrow.bool_().id: "BOOL", @@ -97,6 +113,9 @@ def pyarrow_timestamp(): pyarrow.large_string().id: "STRING", # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", + # NOTE: all extension types (e.g. json_, uuid, db_dtypes.JSONArrowType) + # have the same id (31 as of version 19.0.1), so these should not be + # matched by id. } _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric @@ -107,6 +126,9 @@ def pyarrow_timestamp(): def bq_to_arrow_scalars(bq_scalar: str): """ + DEPRECATED: update pandas_gbq.schema.bigquery_to_pyarrow, instead, which is + to be added in https://github.com/googleapis/python-bigquery-pandas/pull/893. + Returns: The Arrow scalar type that the input BigQuery scalar type maps to. If it cannot find the BigQuery scalar, return None. @@ -116,6 +138,8 @@ def bq_to_arrow_scalars(bq_scalar: str): def arrow_scalar_ids_to_bq(arrow_scalar: Any): """ + DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead. + Returns: The BigQuery scalar type that the input arrow scalar type maps to. If it cannot find the arrow scalar, return None. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index f139e44ad..238ff6beb 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -3533,7 +3533,9 @@ def _row_iterator_page_columns(schema, response): def get_column_data(field_index, field): for row in rows: - yield _helpers.CELL_DATA_PARSER.to_py(row["f"][field_index]["v"], field) + yield _helpers.DATA_FRAME_CELL_DATA_PARSER.to_py( + row["f"][field_index]["v"], field + ) for field_index, field in enumerate(schema): columns.append(get_column_data(field_index, field)) diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index 82cf11f85..f2aed656c 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -194,3 +194,32 @@ def test_list_rows_range_csv( range_type = schema.field("range_date").type assert range_type == expected_type + + +def test_to_arrow_query_with_empty_results(bigquery_client): + """ + JSON regression test for https://github.com/googleapis/python-bigquery/issues/1580. + """ + job = bigquery_client.query( + """ + select + 123 as int_col, + '' as string_col, + to_json('{}') as json_col, + struct(to_json('[]') as json_field, -1 as int_field) as struct_col, + [to_json('null')] as json_array_col, + from unnest([]) + """ + ) + table = job.to_arrow() + assert list(table.column_names) == [ + "int_col", + "string_col", + "json_col", + "struct_col", + "json_array_col", + ] + assert table.shape == (0, 5) + struct_type = table.field("struct_col").type + assert struct_type.get_field_index("json_field") == 0 + assert struct_type.get_field_index("int_field") == 1 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index e65fca27e..01f552435 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1304,6 +1304,32 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id): ] +def test_to_dataframe_query_with_empty_results(bigquery_client): + """ + JSON regression test for https://github.com/googleapis/python-bigquery/issues/1580. + """ + job = bigquery_client.query( + """ + select + 123 as int_col, + '' as string_col, + to_json('{}') as json_col, + struct(to_json('[]') as json_field, -1 as int_field) as struct_col, + [to_json('null')] as json_array_col, + from unnest([]) + """ + ) + df = job.to_dataframe() + assert list(df.columns) == [ + "int_col", + "string_col", + "json_col", + "struct_col", + "json_array_col", + ] + assert len(df.index) == 0 + + def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): wkt = pytest.importorskip("shapely.wkt") bigquery_client.query( diff --git a/tests/unit/_helpers/test_data_frame_cell_data_parser.py b/tests/unit/_helpers/test_data_frame_cell_data_parser.py new file mode 100644 index 000000000..c3332dc89 --- /dev/null +++ b/tests/unit/_helpers/test_data_frame_cell_data_parser.py @@ -0,0 +1,71 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import google.cloud.bigquery.schema + + +def create_field(mode="NULLABLE", type_="IGNORED", name="test_field", **kwargs): + return google.cloud.bigquery.schema.SchemaField(name, type_, mode=mode, **kwargs) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +@pytest.fixture +def object_under_test(mut): + return mut.DATA_FRAME_CELL_DATA_PARSER + + +def test_json_to_py_doesnt_parse_json(object_under_test): + coerced = object_under_test.json_to_py('{"key":"value"}', create_field()) + assert coerced == '{"key":"value"}' + + +def test_json_to_py_repeated_doesnt_parse_json(object_under_test): + coerced = object_under_test.json_to_py('{"key":"value"}', create_field("REPEATED")) + assert coerced == '{"key":"value"}' + + +def test_record_to_py_doesnt_parse_json(object_under_test): + subfield = create_field(type_="JSON", name="json") + field = create_field(fields=[subfield]) + value = {"f": [{"v": '{"key":"value"}'}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"json": '{"key":"value"}'} + + +def test_record_to_py_doesnt_parse_repeated_json(object_under_test): + subfield = create_field("REPEATED", "JSON", name="json") + field = create_field("REQUIRED", fields=[subfield]) + value = { + "f": [ + { + "v": [ + {"v": '{"key":"value0"}'}, + {"v": '{"key":"value1"}'}, + {"v": '{"key":"value2"}'}, + ] + } + ] + } + coerced = object_under_test.record_to_py(value, field) + assert coerced == { + "json": ['{"key":"value0"}', '{"key":"value1"}', '{"key":"value2"}'] + } diff --git a/tests/unit/test__pyarrow_helpers.py b/tests/unit/test__pyarrow_helpers.py index f0a872c88..06fc2eb85 100644 --- a/tests/unit/test__pyarrow_helpers.py +++ b/tests/unit/test__pyarrow_helpers.py @@ -27,8 +27,16 @@ def module_under_test(): def test_bq_to_arrow_scalars(module_under_test): assert ( - module_under_test.bq_to_arrow_scalars("BIGNUMERIC") - == module_under_test.pyarrow_bignumeric + module_under_test.bq_to_arrow_scalars("BIGNUMERIC")() + == module_under_test.pyarrow_bignumeric() + ) + assert ( + # Normally, we'd prefer JSON type built-in to pyarrow (added in 19.0.0), + # but we'd like this to map as closely to the BQ Storage API as + # possible, which uses the string() dtype, as JSON support in Arrow + # predates JSON support in BigQuery by several years. + module_under_test.bq_to_arrow_scalars("JSON")() + == pyarrow.string() ) assert module_under_test.bq_to_arrow_scalars("UNKNOWN_TYPE") is None diff --git a/tests/unit/test_table_arrow.py b/tests/unit/test_table_arrow.py index 6f1e6f76a..830c4ceb7 100644 --- a/tests/unit/test_table_arrow.py +++ b/tests/unit/test_table_arrow.py @@ -28,6 +28,7 @@ def test_to_arrow_with_jobs_query_response(): "fields": [ {"name": "name", "type": "STRING", "mode": "NULLABLE"}, {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + {"name": "json", "type": "JSON", "mode": "NULLABLE"}, ] }, "jobReference": { @@ -37,15 +38,21 @@ def test_to_arrow_with_jobs_query_response(): }, "totalRows": "9", "rows": [ - {"f": [{"v": "Tiarra"}, {"v": "6"}]}, - {"f": [{"v": "Timothy"}, {"v": "325"}]}, - {"f": [{"v": "Tina"}, {"v": "26"}]}, - {"f": [{"v": "Tierra"}, {"v": "10"}]}, - {"f": [{"v": "Tia"}, {"v": "17"}]}, - {"f": [{"v": "Tiara"}, {"v": "22"}]}, - {"f": [{"v": "Tiana"}, {"v": "6"}]}, - {"f": [{"v": "Tiffany"}, {"v": "229"}]}, - {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + {"f": [{"v": "Tiarra"}, {"v": "6"}, {"v": "123"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}, {"v": '{"key":"value"}'}]}, + {"f": [{"v": "Tina"}, {"v": "26"}, {"v": "[1,2,3]"}]}, + { + "f": [ + {"v": "Tierra"}, + {"v": "10"}, + {"v": '{"aKey": {"bKey": {"cKey": -123}}}'}, + ] + }, + {"f": [{"v": "Tia"}, {"v": "17"}, {"v": None}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}, {"v": '"some-json-string"'}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}, {"v": '{"nullKey":null}'}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}, {"v": '""'}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}, {"v": "[]"}]}, ], "totalBytesProcessed": "154775150", "jobComplete": True, @@ -65,7 +72,7 @@ def test_to_arrow_with_jobs_query_response(): ) records = rows.to_arrow() - assert records.column_names == ["name", "number"] + assert records.column_names == ["name", "number", "json"] assert records["name"].to_pylist() == [ "Tiarra", "Timothy", @@ -78,6 +85,17 @@ def test_to_arrow_with_jobs_query_response(): "Tiffani", ] assert records["number"].to_pylist() == [6, 325, 26, 10, 17, 22, 6, 229, 8] + assert records["json"].to_pylist() == [ + "123", + '{"key":"value"}', + "[1,2,3]", + '{"aKey": {"bKey": {"cKey": -123}}}', + None, + '"some-json-string"', + '{"nullKey":null}', + '""', + "[]", + ] def test_to_arrow_with_jobs_query_response_and_max_results(): @@ -87,6 +105,7 @@ def test_to_arrow_with_jobs_query_response_and_max_results(): "fields": [ {"name": "name", "type": "STRING", "mode": "NULLABLE"}, {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + {"name": "json", "type": "JSON", "mode": "NULLABLE"}, ] }, "jobReference": { @@ -96,15 +115,21 @@ def test_to_arrow_with_jobs_query_response_and_max_results(): }, "totalRows": "9", "rows": [ - {"f": [{"v": "Tiarra"}, {"v": "6"}]}, - {"f": [{"v": "Timothy"}, {"v": "325"}]}, - {"f": [{"v": "Tina"}, {"v": "26"}]}, - {"f": [{"v": "Tierra"}, {"v": "10"}]}, - {"f": [{"v": "Tia"}, {"v": "17"}]}, - {"f": [{"v": "Tiara"}, {"v": "22"}]}, - {"f": [{"v": "Tiana"}, {"v": "6"}]}, - {"f": [{"v": "Tiffany"}, {"v": "229"}]}, - {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + {"f": [{"v": "Tiarra"}, {"v": "6"}, {"v": "123"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}, {"v": '{"key":"value"}'}]}, + {"f": [{"v": "Tina"}, {"v": "26"}, {"v": "[1,2,3]"}]}, + { + "f": [ + {"v": "Tierra"}, + {"v": "10"}, + {"v": '{"aKey": {"bKey": {"cKey": -123}}}'}, + ] + }, + {"f": [{"v": "Tia"}, {"v": "17"}, {"v": None}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}, {"v": '"some-json-string"'}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}, {"v": '{"nullKey":null}'}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}, {"v": '""'}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}, {"v": "[]"}]}, ], "totalBytesProcessed": "154775150", "jobComplete": True, @@ -125,10 +150,11 @@ def test_to_arrow_with_jobs_query_response_and_max_results(): ) records = rows.to_arrow() - assert records.column_names == ["name", "number"] + assert records.column_names == ["name", "number", "json"] assert records["name"].to_pylist() == [ "Tiarra", "Timothy", "Tina", ] assert records["number"].to_pylist() == [6, 325, 26] + assert records["json"].to_pylist() == ["123", '{"key":"value"}', "[1,2,3]"] diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 9e42fb737..94737732b 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -59,6 +59,7 @@ def test_to_dataframe_nullable_scalars( pyarrow.field( "timestamp_col", pyarrow.timestamp("us", tz=datetime.timezone.utc) ), + pyarrow.field("json_col", pyarrow.string()), ] ) arrow_table = pyarrow.Table.from_pydict( @@ -78,6 +79,7 @@ def test_to_dataframe_nullable_scalars( 2021, 8, 9, 13, 30, 44, 123456, tzinfo=datetime.timezone.utc ) ], + "json_col": ["{}"], }, schema=arrow_schema, ) @@ -94,6 +96,7 @@ def test_to_dataframe_nullable_scalars( bigquery.SchemaField("string_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("timestamp_col", "TIMESTAMP"), + bigquery.SchemaField("json_col", "JSON"), ] mock_client = mock.create_autospec(bigquery.Client) mock_client.project = "test-proj" @@ -117,6 +120,7 @@ def test_to_dataframe_nullable_scalars( assert df.dtypes["string_col"].name == "object" assert df.dtypes["time_col"].name == "dbtime" assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" + assert df.dtypes["json_col"].name == "object" # Check for expected values. assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415") From 85de1a361d8bbda7ff8a20b34ffd5a0e619a1f38 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 25 Mar 2025 13:45:20 -0500 Subject: [PATCH 422/536] chore(main): release 3.31.0 (#2139) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 21 +++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91d0a362d..4b115464c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,27 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.31.0](https://github.com/googleapis/python-bigquery/compare/v3.30.0...v3.31.0) (2025-03-20) + + +### Features + +* Add query text and total bytes processed to RowIterator ([#2140](https://github.com/googleapis/python-bigquery/issues/2140)) ([2d5f932](https://github.com/googleapis/python-bigquery/commit/2d5f9320d7103bc64c7ba496ba54bb0ef52b5605)) +* Add support for Python 3.13 ([0842aa1](https://github.com/googleapis/python-bigquery/commit/0842aa10967b1d8395cfb43e52c8ea091b381870)) + + +### Bug Fixes + +* Adding property setter for table constraints, [#1990](https://github.com/googleapis/python-bigquery/issues/1990) ([#2092](https://github.com/googleapis/python-bigquery/issues/2092)) ([f8572dd](https://github.com/googleapis/python-bigquery/commit/f8572dd86595361bae82c3232b2c0d159690a7b7)) +* Allow protobuf 6.x ([0842aa1](https://github.com/googleapis/python-bigquery/commit/0842aa10967b1d8395cfb43e52c8ea091b381870)) +* Avoid "Unable to determine type" warning with JSON columns in `to_dataframe` ([#1876](https://github.com/googleapis/python-bigquery/issues/1876)) ([968020d](https://github.com/googleapis/python-bigquery/commit/968020d5be9d2a30b90d046eaf52f91bb2c70911)) +* Remove setup.cfg configuration for creating universal wheels ([#2146](https://github.com/googleapis/python-bigquery/issues/2146)) ([d7f7685](https://github.com/googleapis/python-bigquery/commit/d7f76853d598c354bfd2e65f5dde28dae97da0ec)) + + +### Dependencies + +* Remove Python 3.7 and 3.8 as supported runtimes ([#2133](https://github.com/googleapis/python-bigquery/issues/2133)) ([fb7de39](https://github.com/googleapis/python-bigquery/commit/fb7de398cb2ad000b80a8a702d1f6539dc03d8e0)) + ## [3.30.0](https://github.com/googleapis/python-bigquery/compare/v3.29.0...v3.30.0) (2025-02-26) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 01c4c51ca..c0f7a96d6 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.30.0" +__version__ = "3.31.0" From 4e618e560bfea1ca3b7f17b4fc1f8f438fa6c77e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 27 Mar 2025 19:00:31 +0100 Subject: [PATCH 423/536] chore(deps): update all dependencies (#2143) * chore(deps): update all dependencies * pin ipython===8.18.1 for python 3.9 --------- Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 6 ++--- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 4 +-- samples/geography/requirements.txt | 34 ++++++++++++------------ samples/magics/requirements-test.txt | 6 ++--- samples/magics/requirements.txt | 8 +++--- samples/notebooks/requirements-test.txt | 6 ++--- samples/notebooks/requirements.txt | 13 ++++----- samples/snippets/requirements-test.txt | 6 ++--- samples/snippets/requirements.txt | 2 +- 10 files changed, 44 insertions(+), 43 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index cf215e2fd..c8290d33f 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 165800741..fa349e0d3 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.27.0 +google-cloud-bigquery==3.31.0 google-auth-oauthlib==1.2.1 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 4ad1bd028..5d20a4554 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==8.3.4 -mock==5.1.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 0ad2154a4..3fa11ce7c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,41 +1,41 @@ -attrs==24.3.0 -certifi==2024.12.14 +attrs==25.3.0 +certifi==2025.1.31 cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 -db-dtypes==1.3.1 +db-dtypes==1.4.2 Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 -google-api-core==2.24.0 -google-auth==2.37.0 -google-cloud-bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 -google-cloud-core==2.4.1 -google-crc32c==1.6.0 +google-api-core==2.24.2 +google-auth==2.38.0 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +google-cloud-core==2.4.3 +google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.66.0 -grpcio==1.69.0 +googleapis-common-protos==1.69.2 +grpcio==1.71.0 idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging==24.2 pandas==2.2.3 -proto-plus==1.25.0 -pyarrow==18.1.0 +proto-plus==1.26.1 +pyarrow==19.0.1 pyasn1==0.6.1 pyasn1-modules==0.4.1 pycparser==2.22 -pyparsing==3.2.1 +pyparsing==3.2.3 python-dateutil==2.9.0.post0 -pytz==2024.2 +pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 rsa==4.9 -Shapely==2.0.6 +Shapely==2.0.7 six==1.17.0 -typing-extensions==4.12.2 +typing-extensions==4.13.0 typing-inspect==0.9.0 urllib3==2.3.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index cf215e2fd..c8290d33f 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 4b81fe0ad..3ab215951 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.5.0 -db-dtypes==1.3.1 -google.cloud.bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 +bigquery_magics==0.9.0 +db-dtypes==1.4.2 +google.cloud.bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index cf215e2fd..c8290d33f 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index e92d084a4..ca5505a2e 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,8 +1,9 @@ -bigquery-magics==0.5.0 -db-dtypes==1.3.1 -google-cloud-bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 -ipython==8.18.1 +bigquery-magics==0.9.0 +db-dtypes==1.4.2 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +ipython===8.18.1; python_version == '3.9' +ipython==9.0.2; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.0; python_version >= '3.10' +matplotlib==3.10.1; python_version >= '3.10' pandas==2.2.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 52ccc8ab2..197b89187 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,4 +1,4 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 307ebac24..4b88c6b70 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.27.0 +google-cloud-bigquery==3.31.0 From c526822ce781d5c24e37703507d74fd785a5fe29 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 1 Apr 2025 00:40:19 +0200 Subject: [PATCH 424/536] chore(deps): update dependency pyasn1-modules to v0.4.2 (#2150) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3fa11ce7c..514e19d2c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -26,7 +26,7 @@ pandas==2.2.3 proto-plus==1.26.1 pyarrow==19.0.1 pyasn1==0.6.1 -pyasn1-modules==0.4.1 +pyasn1-modules==0.4.2 pycparser==2.22 pyparsing==3.2.3 python-dateutil==2.9.0.post0 From 77d71736fcc006d3ab8f8ba17955ad5f06e21876 Mon Sep 17 00:00:00 2001 From: yokomotod Date: Wed, 2 Apr 2025 05:16:41 +0900 Subject: [PATCH 425/536] fix: empty record dtypes (#2147) * fix: empty record dtypes * update pandas minimum version * fix coverage * fix test_pandas --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/table.py | 42 +++++++++++++++------------------- pyproject.toml | 2 +- testing/constraints-3.9.txt | 2 +- tests/system/test_pandas.py | 7 +----- tests/unit/test_table.py | 10 ++------ 5 files changed, 23 insertions(+), 40 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 238ff6beb..099f7fd69 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2648,31 +2648,25 @@ def to_dataframe( if pyarrow.types.is_timestamp(col.type) ) - if len(record_batch) > 0: - df = record_batch.to_pandas( + df = record_batch.to_pandas( + date_as_object=date_as_object, + timestamp_as_object=timestamp_as_object, + integer_object_nulls=True, + types_mapper=_pandas_helpers.default_types_mapper( date_as_object=date_as_object, - timestamp_as_object=timestamp_as_object, - integer_object_nulls=True, - types_mapper=_pandas_helpers.default_types_mapper( - date_as_object=date_as_object, - bool_dtype=bool_dtype, - int_dtype=int_dtype, - float_dtype=float_dtype, - string_dtype=string_dtype, - date_dtype=date_dtype, - datetime_dtype=datetime_dtype, - time_dtype=time_dtype, - timestamp_dtype=timestamp_dtype, - range_date_dtype=range_date_dtype, - range_datetime_dtype=range_datetime_dtype, - range_timestamp_dtype=range_timestamp_dtype, - ), - ) - else: - # Avoid "ValueError: need at least one array to concatenate" on - # older versions of pandas when converting empty RecordBatch to - # DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241 - df = pandas.DataFrame([], columns=record_batch.schema.names) + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, + date_dtype=date_dtype, + datetime_dtype=datetime_dtype, + time_dtype=time_dtype, + timestamp_dtype=timestamp_dtype, + range_date_dtype=range_date_dtype, + range_datetime_dtype=range_datetime_dtype, + range_timestamp_dtype=range_timestamp_dtype, + ), + ) for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False) diff --git a/pyproject.toml b/pyproject.toml index 17bf4fd20..38d74cdd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ bqstorage = [ "pyarrow >= 4.0.0", ] pandas = [ - "pandas >= 1.1.4", + "pandas >= 1.3.0", "pandas-gbq >= 0.26.1", "grpcio >= 1.47.0, < 2.0.0", "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 63b5d8bf6..cb6c29f3b 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -21,7 +21,7 @@ opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 packaging==24.2.0 -pandas==1.1.4 +pandas==1.3.0 pandas-gbq==0.26.1 proto-plus==1.22.3 protobuf==3.20.2 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 01f552435..1fe7ff2cd 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1222,12 +1222,7 @@ def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype( # These pandas dtypes are handled by the custom dtypes. assert df.dtypes["bool_col"].name == "boolean" - # Result is dependent upon which version of pandas is being used. - # Float64 was not introduced until pandas version 1.4. - if PANDAS_INSTALLED_VERSION >= "1.4": - assert df.dtypes["float64_col"].name == "Float64" - else: - assert df.dtypes["float64_col"].name == "string" + assert df.dtypes["float64_col"].name == "Float64" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["string_col"].name == "string" diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index b846036ab..3588cfba6 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -4143,14 +4143,8 @@ def test_to_dataframe_w_dtypes_mapper(self): ) self.assertEqual(df.name.dtype.name, "string") - # While pyproject.toml lists pandas 1.1 as the lowest supported version of - # pandas, the pip resolver is not able to resolve pandas 1.1 and numpy - if hasattr(pandas, "Float64Dtype"): - self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) - self.assertEqual(df.miles.dtype.name, "Float64") - else: - self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) - self.assertEqual(df.miles.dtype.name, "string") + self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) + self.assertEqual(df.miles.dtype.name, "Float64") if hasattr(pandas, "ArrowDtype"): self.assertEqual( From c2343dd4a55cfe90bf450547eba45945e6d2ede6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 3 Apr 2025 19:50:49 +0200 Subject: [PATCH 426/536] chore(deps): update dependency shapely to v2.1.0 (#2155) * chore(deps): update dependency shapely to v2.1.0 * pin Shapely===2.0.7 for python 3.9 --------- Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 514e19d2c..5fe9005cc 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -34,7 +34,8 @@ pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 rsa==4.9 -Shapely==2.0.7 +Shapely===2.0.7; python_version == '3.9' +Shapely==2.1.0; python_version >= '3.10' six==1.17.0 typing-extensions==4.13.0 typing-inspect==0.9.0 From e89a707b162182ededbf94cc9a0f7594bc2be475 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 3 Apr 2025 11:45:08 -0700 Subject: [PATCH 427/536] fix: table iterator should not use bqstorage when page_size is not None (#2154) * fix: table iterator should not use bqstorage when page_size is not None * fix dbapi cursor tests --- google/cloud/bigquery/table.py | 11 +++++++++-- tests/unit/test_dbapi_cursor.py | 1 + tests/unit/test_table.py | 7 +++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 099f7fd69..8a3b6151a 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1873,6 +1873,11 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed + @property + def page_size(self) -> Optional[int]: + """The maximum number of rows in each page of results from this request, if present.""" + return self._page_size + def _is_almost_completely_cached(self): """Check if all results are completely cached. @@ -1924,7 +1929,7 @@ def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): if self._is_almost_completely_cached(): return False - if self.max_results is not None: + if self.max_results is not None or self.page_size is not None: return False try: @@ -1994,7 +1999,9 @@ def _maybe_warn_max_results( bqstorage_client: The BigQuery Storage client intended to use for downloading result rows. """ - if bqstorage_client is not None and self.max_results is not None: + if bqstorage_client is not None and ( + self.max_results is not None or self.page_size is not None + ): warnings.warn( "Cannot use bqstorage_client if max_results is set, " "reverting to fetching data with the REST endpoint.", diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 6fca4cec0..cba9030de 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -161,6 +161,7 @@ def _mock_rows( mock_rows, ) mock_rows.max_results = None + mock_rows.page_size = None type(mock_rows).job_id = mock.PropertyMock(return_value="test-job-id") type(mock_rows).location = mock.PropertyMock(return_value="test-location") type(mock_rows).num_dml_affected_rows = mock.PropertyMock( diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3588cfba6..a9966f1ce 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2693,6 +2693,13 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) + def test__should_use_bqstorage_returns_false_if_page_size_set(self): + iterator = self._make_one(page_size=10, first_page_response=None) # not cached + result = iterator._should_use_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached From ae632c5a88546d7c60c7780af7baa4f4c5e4e5a4 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 3 Apr 2025 23:20:01 +0200 Subject: [PATCH 428/536] chore(deps): update dependency typing-extensions to v4.13.1 (#2156) Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5fe9005cc..37bcdf687 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -37,6 +37,6 @@ rsa==4.9 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.0; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.0 +typing-extensions==4.13.1 typing-inspect==0.9.0 urllib3==2.3.0 From 22b80bba9d0bed319fd3102e567906c9b458dd02 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 10 Apr 2025 10:13:17 -0700 Subject: [PATCH 429/536] feat: add preview support for incremental results (#2145) * feat: add preview support for incremental results Plumbs support to enable incremental results. * fastpath allow * add fastquery test * lint * lint * blacken --- google/cloud/bigquery/_job_helpers.py | 1 + google/cloud/bigquery/job/query.py | 15 +++++++++++++++ tests/unit/job/test_query_config.py | 5 +++++ tests/unit/test__job_helpers.py | 12 ++++++++++++ 4 files changed, 33 insertions(+) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index a8373c356..9193f8184 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -564,6 +564,7 @@ def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: "maximumBytesBilled", "requestId", "createSession", + "writeIncrementalResults", } unsupported_keys = request_keys - keys_allowlist diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index a27c10530..f14039bc0 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -674,6 +674,21 @@ def write_disposition(self): def write_disposition(self, value): self._set_sub_prop("writeDisposition", value) + @property + def write_incremental_results(self) -> Optional[bool]: + """This is only supported for a SELECT query using a temporary table. + + If set, the query is allowed to write results incrementally to the temporary result + table. This may incur a performance penalty. This option cannot be used with Legacy SQL. + + This feature is not generally available. + """ + return self._get_sub_prop("writeIncrementalResults") + + @write_incremental_results.setter + def write_incremental_results(self, value): + self._set_sub_prop("writeIncrementalResults", value) + @property def table_definitions(self): """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py index 7818236f4..e0878d067 100644 --- a/tests/unit/job/test_query_config.py +++ b/tests/unit/job/test_query_config.py @@ -167,6 +167,11 @@ def test_connection_properties(self): self.assertEqual(config.connection_properties[1].key, "time_zone") self.assertEqual(config.connection_properties[1].value, "America/Chicago") + def test_incremental_results(self): + config = self._get_target_class()() + config.write_incremental_results = True + self.assertEqual(config.write_incremental_results, True) + def test_create_session(self): config = self._get_target_class()() self.assertIsNone(config.create_session) diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 96914d9f9..4fa093c69 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -194,6 +194,13 @@ def make_query_response( make_query_request({"maximumBytesBilled": "987654"}), id="job_config-with-maximum_bytes_billed", ), + pytest.param( + job_query.QueryJobConfig( + write_incremental_results=True, + ), + make_query_request({"writeIncrementalResults": True}), + id="job_config-with-incremental-results", + ), ), ) def test__to_query_request(job_config, expected): @@ -1141,6 +1148,11 @@ def test_make_job_id_w_job_id_overrides_prefix(): False, id="priority=BATCH", ), + pytest.param( + job_query.QueryJobConfig(write_incremental_results=True), + True, + id="write_incremental_results", + ), ), ) def test_supported_by_jobs_query_from_queryjobconfig( From b162288eb3be5a8bd23b05070eae52fe6c813b1b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 10 Apr 2025 10:37:54 -0700 Subject: [PATCH 430/536] chore(python): remove .gitignore from templates (#2160) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): remove .gitignore from templates Source-Link: https://github.com/googleapis/synthtool/commit/419d94cdddd0d859ac6743ffebd177693c8a027f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove replacement in owlbot.py --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- owlbot.py | 8 -------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 9d743afe8..51b21a62b 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5581906b957284864632cde4e9c51d1cc66b0094990b27e689132fe5cd036046 -# created: 2025-03-07 + digest: sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb +# created: 2025-04-10T17:00:10.042601326Z diff --git a/owlbot.py b/owlbot.py index fceeaa1b6..8cfa2b097 100644 --- a/owlbot.py +++ b/owlbot.py @@ -130,14 +130,6 @@ 'ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]', ) - -# ---------------------------------------------------------------------------- -# pytype-related changes -# ---------------------------------------------------------------------------- - -# Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") - s.shell.run(["nox", "-s", "blacken"], hide_output=False) for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) From 1cabacbcec17a14d80e62627129cdf26696acabe Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 11 Apr 2025 15:23:01 -0400 Subject: [PATCH 431/536] test: adds pytest-xdist to speed up processing of CI/CD checks (#2153) * experimentation using pytest-xdist * adds pytest-xdist to nox system session for experimentation * adds pytest-xdist install AND -n=auto argument * updates sample noxfiles * updates pytest version in requirements-test.txt files * Update samples/notebooks/requirements-test.txt * Update samples/notebooks/requirements-test.txt --- noxfile.py | 23 ++++++++++++++++++++--- samples/desktopapp/requirements-test.txt | 1 + samples/geography/requirements-test.txt | 1 + samples/magics/requirements-test.txt | 1 + samples/notebooks/requirements-test.txt | 1 + samples/snippets/requirements-test.txt | 1 + 6 files changed, 25 insertions(+), 3 deletions(-) diff --git a/noxfile.py b/noxfile.py index 1b118836b..c2b4bbb50 100644 --- a/noxfile.py +++ b/noxfile.py @@ -98,6 +98,7 @@ def default(session, install_extras=True): "pytest", "google-cloud-testutils", "pytest-cov", + "pytest-xdist", "freezegun", "-c", constraints_path, @@ -129,6 +130,7 @@ def default(session, install_extras=True): # Run py.test against the unit tests. session.run( "py.test", + "-n=auto", "--quiet", "-W default::PendingDeprecationWarning", "--cov=google/cloud/bigquery", @@ -224,7 +226,12 @@ def system(session): # Install all test dependencies, then install local packages in place. session.install( - "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path + "pytest", + "psutil", + "pytest-xdist", + "google-cloud-testutils", + "-c", + constraints_path, ) if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true": # mTLS test requires pyopenssl and latest google-cloud-storage @@ -257,6 +264,7 @@ def system(session): # Run py.test against the system tests. session.run( "py.test", + "-n=auto", "--quiet", "-W default::PendingDeprecationWarning", os.path.join("tests", "system"), @@ -310,7 +318,9 @@ def snippets(session): ) # Install all test dependencies, then install local packages in place. - session.install("pytest", "google-cloud-testutils", "-c", constraints_path) + session.install( + "pytest", "pytest-xdist", "google-cloud-testutils", "-c", constraints_path + ) session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) @@ -326,9 +336,12 @@ def snippets(session): # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session # using the nox config from that directory. - session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs) + session.run( + "py.test", "-n=auto", os.path.join("docs", "snippets.py"), *session.posargs + ) session.run( "py.test", + "-n=auto", "samples", "-W default::PendingDeprecationWarning", "--ignore=samples/desktopapp", @@ -393,6 +406,7 @@ def prerelease_deps(session): "google-cloud-testutils", "psutil", "pytest", + "pytest-xdist", "pytest-cov", ) @@ -439,18 +453,21 @@ def prerelease_deps(session): # Run all tests, except a few samples tests which require extra dependencies. session.run( "py.test", + "-n=auto", "tests/unit", "-W default::PendingDeprecationWarning", ) session.run( "py.test", + "-n=auto", "tests/system", "-W default::PendingDeprecationWarning", ) session.run( "py.test", + "-n=auto", "samples/tests", "-W default::PendingDeprecationWarning", ) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index c8290d33f..183230cf4 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 5d20a4554..7b01ce8ac 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,3 @@ pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index c8290d33f..183230cf4 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index c8290d33f..183230cf4 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 197b89187..0cf0bb6b4 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -2,3 +2,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 From a69d6b796d2edb6ba453980c9553bc9b206c5a6e Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 16 Apr 2025 05:20:30 -0400 Subject: [PATCH 432/536] feat: adds condition class and assoc. unit tests (#2159) * feat: adds condition class and assoc. unit tests * Updates two test cases for empty string --- google/cloud/bigquery/dataset.py | 93 ++++++++++++++++++- tests/unit/test_dataset.py | 155 +++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 15a11fb40..cc14598fe 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -19,6 +19,7 @@ import copy import typing +from typing import Optional, List, Dict, Any, Union import google.cloud._helpers # type: ignore @@ -29,8 +30,6 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery import external_config -from typing import Optional, List, Dict, Any, Union - def _get_table_reference(self, table_id: str) -> TableReference: """Constructs a TableReference. @@ -1074,3 +1073,93 @@ def reference(self): model = _get_model_reference routine = _get_routine_reference + + +class Condition(object): + """Represents a textual expression in the Common Expression Language (CEL) syntax. + + Typically used for filtering or policy rules, such as in IAM Conditions + or BigQuery row/column access policies. + + See: + https://cloud.google.com/iam/docs/reference/rest/Shared.Types/Expr + https://github.com/google/cel-spec + + Args: + expression (str): + The condition expression string using CEL syntax. This is required. + Example: ``resource.type == "compute.googleapis.com/Instance"`` + title (Optional[str]): + An optional title for the condition, providing a short summary. + Example: ``"Request is for a GCE instance"`` + description (Optional[str]): + An optional description of the condition, providing a detailed explanation. + Example: ``"This condition checks whether the resource is a GCE instance."`` + """ + + def __init__( + self, + expression: str, + title: Optional[str] = None, + description: Optional[str] = None, + ): + self._properties: Dict[str, Any] = {} + # Use setters to initialize properties, which also handle validation + self.expression = expression + self.title = title + self.description = description + + @property + def title(self) -> Optional[str]: + """Optional[str]: The title for the condition.""" + return self._properties.get("title") + + @title.setter + def title(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("Pass a string for title, or None") + self._properties["title"] = value + + @property + def description(self) -> Optional[str]: + """Optional[str]: The description for the condition.""" + return self._properties.get("description") + + @description.setter + def description(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("Pass a string for description, or None") + self._properties["description"] = value + + @property + def expression(self) -> str: + """str: The expression string for the condition.""" + + # Cast assumes expression is always set due to __init__ validation + return typing.cast(str, self._properties.get("expression")) + + @expression.setter + def expression(self, value: str): + if not isinstance(value, str): + raise ValueError("Pass a non-empty string for expression") + if not value: + raise ValueError("expression cannot be an empty string") + self._properties["expression"] = value + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this Condition.""" + return self._properties + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": + """Factory: construct a Condition instance given its API representation.""" + + # Ensure required fields are present in the resource if necessary + if "expression" not in resource: + raise ValueError("API representation missing required 'expression' field.") + + return cls( + expression=resource["expression"], + title=resource.get("title"), + description=resource.get("description"), + ) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 8ab8dffec..036e22458 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -19,6 +19,7 @@ import pytest from google.cloud.bigquery.dataset import ( AccessEntry, + Condition, Dataset, DatasetReference, Table, @@ -1228,3 +1229,157 @@ def test_table(self): self.assertEqual(table.table_id, "table_id") self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.project, project) + + +class TestCondition: + EXPRESSION = 'resource.name.startsWith("projects/my-project/instances/")' + TITLE = "Instance Access" + DESCRIPTION = "Access to instances in my-project" + + @pytest.fixture + def condition_instance(self): + """Provides a Condition instance for tests.""" + return Condition( + expression=self.EXPRESSION, + title=self.TITLE, + description=self.DESCRIPTION, + ) + + @pytest.fixture + def condition_api_repr(self): + """Provides the API representation for the test Condition.""" + return { + "expression": self.EXPRESSION, + "title": self.TITLE, + "description": self.DESCRIPTION, + } + + # --- Basic Functionality Tests --- + + def test_constructor_and_getters_full(self, condition_instance): + """Test initialization with all arguments and subsequent attribute access.""" + assert condition_instance.expression == self.EXPRESSION + assert condition_instance.title == self.TITLE + assert condition_instance.description == self.DESCRIPTION + + def test_constructor_and_getters_minimal(self): + """Test initialization with only the required expression.""" + condition = Condition(expression=self.EXPRESSION) + assert condition.expression == self.EXPRESSION + assert condition.title is None + assert condition.description is None + + def test_setters(self, condition_instance): + """Test setting attributes after initialization.""" + new_title = "New Title" + new_desc = "New Description" + new_expr = "request.time < timestamp('2024-01-01T00:00:00Z')" + + condition_instance.title = new_title + assert condition_instance.title == new_title + + condition_instance.description = new_desc + assert condition_instance.description == new_desc + + condition_instance.expression = new_expr + assert condition_instance.expression == new_expr + + # Test setting title and description to empty strings + condition_instance.title = "" + assert condition_instance.title == "" + + condition_instance.description = "" + assert condition_instance.description == "" + + # Test setting optional fields back to None + condition_instance.title = None + assert condition_instance.title is None + condition_instance.description = None + assert condition_instance.description is None + + # --- API Representation Tests --- + + def test_to_api_repr_full(self, condition_instance, condition_api_repr): + """Test converting a fully populated Condition to API representation.""" + api_repr = condition_instance.to_api_repr() + assert api_repr == condition_api_repr + + def test_to_api_repr_minimal(self): + """Test converting a minimally populated Condition to API representation.""" + condition = Condition(expression=self.EXPRESSION) + expected_api_repr = { + "expression": self.EXPRESSION, + "title": None, + "description": None, + } + api_repr = condition.to_api_repr() + assert api_repr == expected_api_repr + + def test_from_api_repr_full(self, condition_api_repr): + """Test creating a Condition from a full API representation.""" + condition = Condition.from_api_repr(condition_api_repr) + assert condition.expression == self.EXPRESSION + assert condition.title == self.TITLE + assert condition.description == self.DESCRIPTION + + def test_from_api_repr_minimal(self): + """Test creating a Condition from a minimal API representation.""" + minimal_repr = {"expression": self.EXPRESSION} + condition = Condition.from_api_repr(minimal_repr) + assert condition.expression == self.EXPRESSION + assert condition.title is None + assert condition.description is None + + def test_from_api_repr_with_extra_fields(self): + """Test creating a Condition from an API repr with unexpected fields.""" + api_repr = { + "expression": self.EXPRESSION, + "title": self.TITLE, + "unexpected_field": "some_value", + } + condition = Condition.from_api_repr(api_repr) + assert condition.expression == self.EXPRESSION + assert condition.title == self.TITLE + assert condition.description is None + # Check that the extra field didn't get added to internal properties + assert "unexpected_field" not in condition._properties + + # # --- Validation Tests --- + + @pytest.mark.parametrize( + "kwargs, error_msg", + [ + ({"expression": None}, "Pass a non-empty string for expression"), # type: ignore + ({"expression": ""}, "expression cannot be an empty string"), + ({"expression": 123}, "Pass a non-empty string for expression"), # type: ignore + ({"expression": EXPRESSION, "title": 123}, "Pass a string for title, or None"), # type: ignore + ({"expression": EXPRESSION, "description": False}, "Pass a string for description, or None"), # type: ignore + ], + ) + def test_validation_init(self, kwargs, error_msg): + """Test validation during __init__.""" + with pytest.raises(ValueError, match=error_msg): + Condition(**kwargs) + + @pytest.mark.parametrize( + "attribute, value, error_msg", + [ + ("expression", None, "Pass a non-empty string for expression"), # type: ignore + ("expression", "", "expression cannot be an empty string"), + ("expression", 123, "Pass a non-empty string for expression"), # type: ignore + ("title", 123, "Pass a string for title, or None"), # type: ignore + ("description", [], "Pass a string for description, or None"), # type: ignore + ], + ) + def test_validation_setters(self, condition_instance, attribute, value, error_msg): + """Test validation via setters.""" + with pytest.raises(ValueError, match=error_msg): + setattr(condition_instance, attribute, value) + + def test_validation_expression_required_from_api(self): + """Test ValueError is raised if expression is missing in from_api_repr.""" + api_repr = {"title": self.TITLE} + with pytest.raises( + ValueError, match="API representation missing required 'expression' field." + ): + Condition.from_api_repr(api_repr) From ca1798aaee2d5905fe688d3097f8ee5c989da333 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 24 Apr 2025 15:46:59 -0500 Subject: [PATCH 433/536] fix: `query()` now warns when `job_id` is set and the default `job_retry` is ignored (#2167) * fix: `query()` now warns when `job_id` is set and the default `job_retry` is ignored * Update google/cloud/bigquery/client.py * allow None for job_retry in code path that calls jobs.query from client.query * allow None for job_retry in code path that calls jobs.query from client.query * Update tests/unit/test_job_retry.py --- google/cloud/bigquery/_job_helpers.py | 42 ++++++++++++++++++++++++++- google/cloud/bigquery/client.py | 15 ++-------- tests/unit/test_job_retry.py | 18 ++++++++---- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 9193f8184..4a884ada5 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -39,7 +39,9 @@ import functools import os import uuid +import textwrap from typing import Any, Dict, Optional, TYPE_CHECKING, Union +import warnings import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries @@ -198,6 +200,44 @@ def _validate_job_config(request_body: Dict[str, Any], invalid_key: str): raise ValueError(f"got unexpected key {repr(invalid_key)} in job_config") +def validate_job_retry(job_id: Optional[str], job_retry: Optional[retries.Retry]): + """Catch common mistakes, such as setting a job_id and job_retry at the same + time. + """ + if job_id is not None and job_retry is not None: + # TODO(tswast): To avoid breaking changes but still allow a default + # query job retry, we currently only raise if they explicitly set a + # job_retry other than the default. In a future version, we may want to + # avoid this check for DEFAULT_JOB_RETRY and always raise. + if job_retry is not google.cloud.bigquery.retry.DEFAULT_JOB_RETRY: + raise TypeError( + textwrap.dedent( + """ + `job_retry` was provided, but the returned job is + not retryable, because a custom `job_id` was + provided. To customize the job ID and allow for job + retries, set job_id_prefix, instead. + """ + ).strip() + ) + else: + warnings.warn( + textwrap.dedent( + """ + job_retry must be explicitly set to None if job_id is set. + BigQuery cannot retry a failed job by using the exact + same ID. Setting job_id without explicitly disabling + job_retry will raise an error in the future. To avoid this + warning, either use job_id_prefix instead (preferred) or + set job_retry=None. + """ + ).strip(), + category=FutureWarning, + # user code -> client.query / client.query_and_wait -> validate_job_retry + stacklevel=3, + ) + + def _to_query_request( job_config: Optional[job.QueryJobConfig] = None, *, @@ -308,7 +348,7 @@ def query_jobs_query( project: str, retry: retries.Retry, timeout: Optional[float], - job_retry: retries.Retry, + job_retry: Optional[retries.Retry], ) -> job.QueryJob: """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED. diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8bbdd6c32..e7cafc47e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3388,7 +3388,7 @@ def query( project: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - job_retry: retries.Retry = DEFAULT_JOB_RETRY, + job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT, ) -> job.QueryJob: """Run a SQL query. @@ -3455,18 +3455,9 @@ def query( class, or if both ``job_id`` and non-``None`` non-default ``job_retry`` are provided. """ - job_id_given = job_id is not None - if ( - job_id_given - and job_retry is not None - and job_retry is not DEFAULT_JOB_RETRY - ): - raise TypeError( - "`job_retry` was provided, but the returned job is" - " not retryable, because a custom `job_id` was" - " provided." - ) + _job_helpers.validate_job_retry(job_id, job_retry) + job_id_given = job_id is not None if job_id_given and api_method == enums.QueryApiMethod.QUERY: raise TypeError( "`job_id` was provided, but the 'QUERY' `api_method` was requested." diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 958986052..7144c640b 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -511,26 +511,34 @@ def api_request(method, path, query_params=None, data=None, **kw): def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): with pytest.raises( TypeError, - match=re.escape( + match=( "`job_retry` was provided, but the returned job is" " not retryable, because a custom `job_id` was" " provided." - ), + ).replace(" ", r"\s"), ): client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): client._connection = make_connection({}) - job = client.query("select 42", job_id=42) + + with pytest.warns( + FutureWarning, + match=re.escape("job_retry must be explicitly set to None if job_id is set."), + ): + # Implicitly providing a job_retry is a warning and will be an error in the future. + job = client.query("select 42", job_id=42) + with pytest.raises( TypeError, - match=re.escape( + match=( "`job_retry` was provided, but this job is" " not retryable, because a custom `job_id` was" " provided to the query that created this job." - ), + ).replace(" ", r"\s"), ): + # Explicitly providing a job_retry is an error. job.result(job_retry=google.api_core.retry.Retry()) From a1c8e9aaf60986924868d54a0ab0334e77002a39 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 25 Apr 2025 10:29:54 -0700 Subject: [PATCH 434/536] feat: support BigLakeConfiguration (managed Iceberg tables) (#2162) * feat: support BigLakeConfiguration (managed Iceberg tables) This PR adds the BigLakeConfiguration class to tables, and the necessary property mappings from Table. It also adds some utility enums (BigLakeFileFormat, BigLakeTableFormat) to more easily communicate available values for configuraiton. --- google/cloud/bigquery/enums.py | 16 ++++ google/cloud/bigquery/table.py | 150 +++++++++++++++++++++++++++++++ tests/unit/test_table.py | 160 +++++++++++++++++++++++++++++++++ 3 files changed, 326 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 5519bc989..b32fc8200 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -387,3 +387,19 @@ def _generate_next_value_(name, start, count, last_values): ROUNDING_MODE_UNSPECIFIED = enum.auto() ROUND_HALF_AWAY_FROM_ZERO = enum.auto() ROUND_HALF_EVEN = enum.auto() + + +class BigLakeFileFormat(object): + FILE_FORMAT_UNSPECIFIED = "FILE_FORMAT_UNSPECIFIED" + """The default unspecified value.""" + + PARQUET = "PARQUET" + """Apache Parquet format.""" + + +class BigLakeTableFormat(object): + TABLE_FORMAT_UNSPECIFIED = "TABLE_FORMAT_UNSPECIFIED" + """The default unspecified value.""" + + ICEBERG = "ICEBERG" + """Apache Iceberg format.""" diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 8a3b6151a..503ca4e71 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -380,6 +380,7 @@ class Table(_TableBase): _PROPERTY_TO_API_FIELD: Dict[str, Any] = { **_TableBase._PROPERTY_TO_API_FIELD, + "biglake_configuration": "biglakeConfiguration", "clustering_fields": "clustering", "created": "creationTime", "description": "description", @@ -431,6 +432,29 @@ def __init__(self, table_ref, schema=None) -> None: reference = property(_reference_getter) + @property + def biglake_configuration(self): + """google.cloud.bigquery.table.BigLakeConfiguration: Configuration + for managed tables for Apache Iceberg. + + See https://cloud.google.com/bigquery/docs/iceberg-tables for more information. + """ + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["biglake_configuration"] + ) + if prop is not None: + prop = BigLakeConfiguration.from_api_repr(prop) + return prop + + @biglake_configuration.setter + def biglake_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._properties[ + self._PROPERTY_TO_API_FIELD["biglake_configuration"] + ] = api_repr + @property def require_partition_filter(self): """bool: If set to true, queries over the partitioned table require a @@ -3501,6 +3525,132 @@ def to_api_repr(self) -> Dict[str, Any]: return resource +class BigLakeConfiguration(object): + """Configuration for managed tables for Apache Iceberg, formerly + known as BigLake. + + Args: + connection_id (Optional[str]): + The connection specifying the credentials to be used to read and write to external + storage, such as Cloud Storage. The connection_id can have the form + ``{project}.{location}.{connection_id}`` or + ``projects/{project}/locations/{location}/connections/{connection_id}``. + storage_uri (Optional[str]): + The fully qualified location prefix of the external folder where table data is + stored. The '*' wildcard character is not allowed. The URI should be in the + format ``gs://bucket/path_to_table/``. + file_format (Optional[str]): + The file format the table data is stored in. See BigLakeFileFormat for available + values. + table_format (Optional[str]): + The table format the metadata only snapshots are stored in. See BigLakeTableFormat + for available values. + _properties (Optional[dict]): + Private. Used to construct object from API resource. + """ + + def __init__( + self, + connection_id: Optional[str] = None, + storage_uri: Optional[str] = None, + file_format: Optional[str] = None, + table_format: Optional[str] = None, + _properties: Optional[dict] = None, + ) -> None: + if _properties is None: + _properties = {} + self._properties = _properties + if connection_id is not None: + self.connection_id = connection_id + if storage_uri is not None: + self.storage_uri = storage_uri + if file_format is not None: + self.file_format = file_format + if table_format is not None: + self.table_format = table_format + + @property + def connection_id(self) -> Optional[str]: + """str: The connection specifying the credentials to be used to read and write to external + storage, such as Cloud Storage.""" + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value: Optional[str]): + self._properties["connectionId"] = value + + @property + def storage_uri(self) -> Optional[str]: + """str: The fully qualified location prefix of the external folder where table data is + stored.""" + return self._properties.get("storageUri") + + @storage_uri.setter + def storage_uri(self, value: Optional[str]): + self._properties["storageUri"] = value + + @property + def file_format(self) -> Optional[str]: + """str: The file format the table data is stored in. See BigLakeFileFormat for available + values.""" + return self._properties.get("fileFormat") + + @file_format.setter + def file_format(self, value: Optional[str]): + self._properties["fileFormat"] = value + + @property + def table_format(self) -> Optional[str]: + """str: The table format the metadata only snapshots are stored in. See BigLakeTableFormat + for available values.""" + return self._properties.get("tableFormat") + + @table_format.setter + def table_format(self, value: Optional[str]): + self._properties["tableFormat"] = value + + def _key(self): + return tuple(sorted(self._properties.items())) + + def __eq__(self, other): + if not isinstance(other, BigLakeConfiguration): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + key_vals = ["{}={}".format(key, val) for key, val in self._key()] + return "BigLakeConfiguration({})".format(",".join(key_vals)) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "BigLakeConfiguration": + """Factory: construct a BigLakeConfiguration given its API representation. + + Args: + resource: + BigLakeConfiguration representation returned from the API + + Returns: + BigLakeConfiguration parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this BigLakeConfiguration. + + Returns: + BigLakeConfiguration represented as an API resource. + """ + return copy.deepcopy(self._properties) + + def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a9966f1ce..253006547 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -435,6 +435,12 @@ def _make_resource(self): "sourceFormat": "CSV", "csvOptions": {"allowJaggedRows": True, "encoding": "encoding"}, }, + "biglakeConfiguration": { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + }, "labels": {"x": "y"}, } @@ -521,6 +527,15 @@ def _verifyResourceProperties(self, table, resource): else: self.assertIsNone(table.encryption_configuration) + if "biglakeConfiguration" in resource: + self.assertIsNotNone(table.biglake_configuration) + self.assertEqual(table.biglake_configuration.connection_id, "connection") + self.assertEqual(table.biglake_configuration.storage_uri, "uri") + self.assertEqual(table.biglake_configuration.file_format, "PARQUET") + self.assertEqual(table.biglake_configuration.table_format, "ICEBERG") + else: + self.assertIsNone(table.biglake_configuration) + def test_ctor(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -893,6 +908,60 @@ def test_table_constraints_property_getter(self): assert isinstance(table_constraints, TableConstraints) assert table_constraints.primary_key == PrimaryKey(columns=["id"]) + def test_biglake_configuration_not_set(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + assert table.biglake_configuration is None + + def test_biglake_configuration_set(self): + from google.cloud.bigquery.table import BigLakeConfiguration + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["biglakeConfiguration"] = { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + } + + config = table.biglake_configuration + + assert isinstance(config, BigLakeConfiguration) + assert config.connection_id == "connection" + assert config.storage_uri == "uri" + assert config.file_format == "PARQUET" + assert config.table_format == "ICEBERG" + + def test_biglake_configuration_property_setter(self): + from google.cloud.bigquery.table import BigLakeConfiguration + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + config = BigLakeConfiguration( + connection_id="connection", + storage_uri="uri", + file_format="PARQUET", + table_format="ICEBERG", + ) + table.biglake_configuration = config + + assert table._properties["biglakeConfiguration"] == { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + } + + table.biglake_configuration = None + assert table.biglake_configuration is None + def test_table_constraints_property_setter(self): from google.cloud.bigquery.table import ( ColumnReference, @@ -2166,6 +2235,97 @@ def test_ctor_full_resource(self): assert instance.snapshot_time == expected_time +class TestBigLakeConfiguration(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import BigLakeConfiguration + + return BigLakeConfiguration + + @classmethod + def _make_one(cls, *args, **kwargs): + klass = cls._get_target_class() + return klass(*args, **kwargs) + + def test_ctor_empty_resource(self): + instance = self._make_one() + self.assertIsNone(instance.connection_id) + self.assertIsNone(instance.storage_uri) + self.assertIsNone(instance.file_format) + self.assertIsNone(instance.table_format) + + def test_ctor_kwargs(self): + instance = self._make_one( + connection_id="conn", + storage_uri="uri", + file_format="FILE", + table_format="TABLE", + ) + self.assertEqual(instance.connection_id, "conn") + self.assertEqual(instance.storage_uri, "uri") + self.assertEqual(instance.file_format, "FILE") + self.assertEqual(instance.table_format, "TABLE") + + def test_ctor_full_resource(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + instance = self._make_one(_properties=resource) + self.assertEqual(instance.connection_id, "conn") + self.assertEqual(instance.storage_uri, "uri") + self.assertEqual(instance.file_format, "FILE") + self.assertEqual(instance.table_format, "TABLE") + + def test_to_api_repr(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + instance = self._make_one(_properties=resource) + self.assertEqual(instance.to_api_repr(), resource) + + def test_from_api_repr_partial(self): + klass = self._get_target_class() + api_repr = {"fileFormat": "FILE"} + instance = klass.from_api_repr(api_repr) + + self.assertIsNone(instance.connection_id) + self.assertIsNone(instance.storage_uri) + self.assertEqual(instance.file_format, "FILE") + self.assertIsNone(instance.table_format) + + def test_comparisons(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + + first = self._make_one(_properties=resource) + second = self._make_one(_properties=copy.deepcopy(resource)) + # Exercise comparator overloads. + # first and second should be equivalent. + self.assertNotEqual(first, resource) + self.assertEqual(first, second) + self.assertEqual(hash(first), hash(second)) + + # Update second to ensure that first and second are no longer equivalent. + second.connection_id = "foo" + self.assertNotEqual(first, second) + self.assertNotEqual(hash(first), hash(second)) + + # Update first with the same change, restoring equivalence. + first.connection_id = "foo" + self.assertEqual(first, second) + self.assertEqual(hash(first), hash(second)) + + class TestCloneDefinition: @staticmethod def _get_target_class(): From 7301667272dfbdd04b1a831418a9ad2d037171fb Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 29 Apr 2025 09:16:36 -0400 Subject: [PATCH 435/536] feat: Update the AccessEntry class with a new condition attribute and unit tests (#2163) * feat: adds condition class and assoc. unit tests * Updates AccessEntry with condition setter/getter * Adds condition attr to AccessEntry and unit tests * adds tests for Condition dunder methods to ensure coverage * moves the entity_type logic out of _from_api_repr to entity_type setter * Updates logic in entity_type getter * updates several AccessEntry related tests * Updates AccessEntry condition setter test to use a dict * udpates entity_id handling * Updates _entity_type access * tweaks type hinting * Update tests/unit/test_dataset.py * Update tests/unit/test_dataset.py * Updates DatasetReference in test and __eq__ check * remove debug print statement --- google/cloud/bigquery/dataset.py | 126 ++++++++++-- tests/unit/test_dataset.py | 336 +++++++++++++++++++++++++++++-- 2 files changed, 432 insertions(+), 30 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index cc14598fe..670fe127c 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -298,12 +298,15 @@ def __init__( role: Optional[str] = None, entity_type: Optional[str] = None, entity_id: Optional[Union[Dict[str, Any], str]] = None, + **kwargs, ): - self._properties = {} + self._properties: Dict[str, Any] = {} if entity_type is not None: self._properties[entity_type] = entity_id self._properties["role"] = role - self._entity_type = entity_type + self._entity_type: Optional[str] = entity_type + for prop, val in kwargs.items(): + setattr(self, prop, val) @property def role(self) -> Optional[str]: @@ -330,6 +333,9 @@ def dataset(self, value): if isinstance(value, str): value = DatasetReference.from_string(value).to_api_repr() + if isinstance(value, DatasetReference): + value = value.to_api_repr() + if isinstance(value, (Dataset, DatasetListItem)): value = value.reference.to_api_repr() @@ -437,15 +443,65 @@ def special_group(self) -> Optional[str]: def special_group(self, value): self._properties["specialGroup"] = value + @property + def condition(self) -> Optional["Condition"]: + """Optional[Condition]: The IAM condition associated with this entry.""" + value = typing.cast(Dict[str, Any], self._properties.get("condition")) + return Condition.from_api_repr(value) if value else None + + @condition.setter + def condition(self, value: Union["Condition", dict, None]): + """Set the IAM condition for this entry.""" + if value is None: + self._properties["condition"] = None + elif isinstance(value, Condition): + self._properties["condition"] = value.to_api_repr() + elif isinstance(value, dict): + self._properties["condition"] = value + else: + raise TypeError("condition must be a Condition object, dict, or None") + @property def entity_type(self) -> Optional[str]: """The entity_type of the entry.""" + + # The api_repr for an AccessEntry object is expected to be a dict with + # only a few keys. Two keys that may be present are role and condition. + # Any additional key is going to have one of ~eight different names: + # userByEmail, groupByEmail, domain, dataset, specialGroup, view, + # routine, iamMember + + # if self._entity_type is None, see if it needs setting + # i.e. is there a key: value pair that should be associated with + # entity_type and entity_id? + if self._entity_type is None: + resource = self._properties.copy() + # we are empyting the dict to get to the last `key: value`` pair + # so we don't keep these first entries + _ = resource.pop("role", None) + _ = resource.pop("condition", None) + + try: + # we only need entity_type, because entity_id gets set elsewhere. + entity_type, _ = resource.popitem() + except KeyError: + entity_type = None + + self._entity_type = entity_type + return self._entity_type @property def entity_id(self) -> Optional[Union[Dict[str, Any], str]]: """The entity_id of the entry.""" - return self._properties.get(self._entity_type) if self._entity_type else None + if self.entity_type: + entity_type = self.entity_type + else: + return None + return typing.cast( + Optional[Union[Dict[str, Any], str]], + self._properties.get(entity_type, None), + ) def __eq__(self, other): if not isinstance(other, AccessEntry): @@ -464,7 +520,16 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. """ + properties = self._properties.copy() + + # Dicts are not hashable. + # Convert condition to a hashable datatype(s) + condition = properties.get("condition") + if isinstance(condition, dict): + condition_key = tuple(sorted(condition.items())) + properties["condition"] = condition_key + prop_tup = tuple(sorted(properties.items())) return (self.role, self._entity_type, self.entity_id, prop_tup) @@ -491,19 +556,11 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": Returns: google.cloud.bigquery.dataset.AccessEntry: Access entry parsed from ``resource``. - - Raises: - ValueError: - If the resource has more keys than ``role`` and one additional - key. """ - entry = resource.copy() - role = entry.pop("role", None) - entity_type, entity_id = entry.popitem() - if len(entry) != 0: - raise ValueError("Entry has unexpected keys remaining.", entry) - return cls(role, entity_type, entity_id) + access_entry = cls() + access_entry._properties = resource.copy() + return access_entry class Dataset(object): @@ -1160,6 +1217,43 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": return cls( expression=resource["expression"], - title=resource.get("title"), - description=resource.get("description"), + title=resource.get("title", None), + description=resource.get("description", None), ) + + def __eq__(self, other: object) -> bool: + """Check for equality based on expression, title, and description.""" + if not isinstance(other, Condition): + return NotImplemented + return self._key() == other._key() + + def _key(self): + """A tuple key that uniquely describes this field. + Used to compute this instance's hashcode and evaluate equality. + Returns: + Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. + """ + + properties = self._properties.copy() + + # Dicts are not hashable. + # Convert object to a hashable datatype(s) + prop_tup = tuple(sorted(properties.items())) + return prop_tup + + def __ne__(self, other: object) -> bool: + """Check for inequality.""" + return not self == other + + def __hash__(self) -> int: + """Generate a hash based on expression, title, and description.""" + return hash(self._key()) + + def __repr__(self) -> str: + """Return a string representation of the Condition object.""" + parts = [f"expression={self.expression!r}"] + if self.title is not None: + parts.append(f"title={self.title!r}") + if self.description is not None: + parts.append(f"description={self.description!r}") + return f"Condition({', '.join(parts)})" diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 036e22458..51f1809bf 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -167,7 +167,10 @@ def test_from_api_repr_wo_role(self): entity_type="view", entity_id=resource["view"], ) - self.assertEqual(entry, exp_entry) + + assert entry.entity_type == exp_entry.entity_type + assert entry.entity_id == exp_entry.entity_id + assert entry.role is None def test_to_api_repr_w_extra_properties(self): resource = { @@ -179,15 +182,6 @@ def test_to_api_repr_w_extra_properties(self): exp_resource = entry.to_api_repr() self.assertEqual(resource, exp_resource) - def test_from_api_repr_entries_w_extra_keys(self): - resource = { - "role": "READER", - "specialGroup": "projectReaders", - "userByEmail": "salmon@example.com", - } - with self.assertRaises(ValueError): - self._get_target_class().from_api_repr(resource) - def test_view_getter_setter(self): view = { "projectId": "my_project", @@ -307,7 +301,10 @@ def test_dataset_getter_setter_dataset_ref(self): entry.dataset = dataset_ref resource = entry.to_api_repr() exp_resource = { - "dataset": {"dataset": dataset_ref, "targetTypes": None}, + "dataset": { + "dataset": {"datasetId": "my_dataset", "projectId": "my-project"}, + "targetTypes": None, + }, "role": None, } self.assertEqual(resource, exp_resource) @@ -494,6 +491,262 @@ def test_dataset_target_types_getter_setter_w_dataset(self): self.assertEqual(entry.dataset_target_types, target_types) +# --- Tests for AccessEntry when using Condition --- + +EXPRESSION = "request.time < timestamp('2026-01-01T00:00:00Z')" +TITLE = "Expires end 2025" +DESCRIPTION = "Access expires at the start of 2026." + + +@pytest.fixture +def condition_1(): + """Provides a sample Condition object.""" + return Condition( + expression=EXPRESSION, + title=TITLE, + description=DESCRIPTION, + ) + + +@pytest.fixture +def condition_1_api_repr(): + """Provides the API representation for condition_1.""" + # Use the actual to_api_repr method + return Condition( + expression=EXPRESSION, + title=TITLE, + description=DESCRIPTION, + ).to_api_repr() + + +@pytest.fixture +def condition_2(): + """Provides a second, different Condition object.""" + return Condition( + expression="resource.name.startsWith('projects/_/buckets/restricted/')", + title="Restricted Buckets", + ) + + +@pytest.fixture +def condition_2_api_repr(): + """Provides the API representation for condition2.""" + # Use the actual to_api_repr method + return Condition( + expression="resource.name.startsWith('projects/_/buckets/restricted/')", + title="Restricted Buckets", + ).to_api_repr() + + +class TestAccessEntryAndCondition: + @staticmethod + def _get_target_class(): + return AccessEntry + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + # Test __init__ without condition + def test_init_without_condition(self): + entry = AccessEntry("READER", "userByEmail", "test@example.com") + assert entry.role == "READER" + assert entry.entity_type == "userByEmail" + assert entry.entity_id == "test@example.com" + assert entry.condition is None + # Accessing _properties is for internal verification in tests + assert "condition" not in entry._properties + + # Test __init__ with condition object + def test_init_with_condition_object(self, condition_1, condition_1_api_repr): + entry = AccessEntry( + "READER", "userByEmail", "test@example.com", condition=condition_1 + ) + assert entry.condition == condition_1 + assert entry._properties.get("condition") == condition_1_api_repr + + # Test __init__ with condition=None + def test_init_with_condition_none(self): + entry = AccessEntry("READER", "userByEmail", "test@example.com", condition=None) + assert entry.condition is None + + # Test condition getter/setter + def test_condition_getter_setter( + self, condition_1, condition_1_api_repr, condition_2, condition_2_api_repr + ): + entry = AccessEntry("WRITER", "group", "admins@example.com") + assert entry.condition is None + + # Set condition 1 + entry.condition = condition_1 + assert entry.condition.to_api_repr() == condition_1_api_repr + assert entry._properties.get("condition") == condition_1_api_repr + + # Set condition 2 + entry.condition = condition_2 + assert entry.condition.to_api_repr() == condition_2_api_repr + assert entry._properties.get("condition") != condition_1_api_repr + assert entry._properties.get("condition") == condition_2.to_api_repr() + + # Set back to None + entry.condition = None + assert entry.condition is None + + # Set condition using a dict + entry.condition = condition_1_api_repr + assert entry._properties.get("condition") == condition_1_api_repr + + # Test setter validation + def test_condition_setter_invalid_type(self): + entry = AccessEntry("READER", "domain", "example.com") + with pytest.raises( + TypeError, match="condition must be a Condition object, dict, or None" + ): + entry.condition = 123 # type: ignore + + # Test equality/hash without condition + def test_equality_and_hash_without_condition(self): + entry1 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry2 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry3 = AccessEntry("WRITER", "specialGroup", "projectOwners") + assert entry1 == entry2 + assert entry1 != entry3 + assert hash(entry1) == hash(entry2) + assert hash(entry1) != hash(entry3) # Usually true + + def test_equality_and_hash_with_condition(self, condition_1, condition_2): + cond1a = Condition( + condition_1.expression, condition_1.title, condition_1.description + ) + cond1b = Condition( + condition_1.expression, condition_1.title, condition_1.description + ) # Same values, different object + + entry1a = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=cond1a + ) + entry1b = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=cond1b + ) # Different Condition instance + entry2 = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=condition_2 + ) + entry3 = AccessEntry("READER", "userByEmail", "a@example.com") # No condition + entry4 = AccessEntry( + "WRITER", "userByEmail", "a@example.com", condition=cond1a + ) # Different role + + assert entry1a == entry1b + assert entry1a != entry2 + assert entry1a != entry3 + assert entry1a != entry4 + assert entry2 != entry3 + + assert hash(entry1a) == hash(entry1b) + assert hash(entry1a) != hash(entry2) # Usually true + assert hash(entry1a) != hash(entry3) # Usually true + assert hash(entry1a) != hash(entry4) # Usually true + + # Test to_api_repr with condition + def test_to_api_repr_with_condition(self, condition_1, condition_1_api_repr): + entry = AccessEntry( + "WRITER", "groupByEmail", "editors@example.com", condition=condition_1 + ) + expected_repr = { + "role": "WRITER", + "groupByEmail": "editors@example.com", + "condition": condition_1_api_repr, + } + assert entry.to_api_repr() == expected_repr + + def test_view_property_with_condition(self, condition_1): + """Test setting/getting view property when condition is present.""" + entry = AccessEntry(role=None, entity_type="view", condition=condition_1) + view_ref = TableReference(DatasetReference("proj", "dset"), "view_tbl") + entry.view = view_ref # Use the setter + assert entry.view == view_ref + assert entry.condition == condition_1 # Condition should persist + assert entry.role is None + assert entry.entity_type == "view" + + # Check internal representation + assert "view" in entry._properties + assert "condition" in entry._properties + + def test_user_by_email_property_with_condition(self, condition_1): + """Test setting/getting user_by_email property when condition is present.""" + entry = AccessEntry( + role="READER", entity_type="userByEmail", condition=condition_1 + ) + email = "test@example.com" + entry.user_by_email = email # Use the setter + assert entry.user_by_email == email + assert entry.condition == condition_1 # Condition should persist + assert entry.role == "READER" + assert entry.entity_type == "userByEmail" + + # Check internal representation + assert "userByEmail" in entry._properties + assert "condition" in entry._properties + + # Test from_api_repr without condition + def test_from_api_repr_without_condition(self): + api_repr = {"role": "OWNER", "userByEmail": "owner@example.com"} + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "OWNER" + assert entry.entity_type == "userByEmail" + assert entry.entity_id == "owner@example.com" + assert entry.condition is None + + # Test from_api_repr with condition + def test_from_api_repr_with_condition(self, condition_1, condition_1_api_repr): + api_repr = { + "role": "READER", + "view": {"projectId": "p", "datasetId": "d", "tableId": "v"}, + "condition": condition_1_api_repr, + } + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "READER" + assert entry.entity_type == "view" + # The entity_id for view/routine/dataset is the dict itself + assert entry.entity_id == {"projectId": "p", "datasetId": "d", "tableId": "v"} + assert entry.condition == condition_1 + + # Test from_api_repr edge case + def test_from_api_repr_no_entity(self, condition_1, condition_1_api_repr): + api_repr = {"role": "READER", "condition": condition_1_api_repr} + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "READER" + assert entry.entity_type is None + assert entry.entity_id is None + assert entry.condition == condition_1 + + def test_dataset_property_with_condition(self, condition_1): + project = "my-project" + dataset_id = "my_dataset" + dataset_ref = DatasetReference(project, dataset_id) + entry = self._make_one(None) + entry.dataset = dataset_ref + entry.condition = condition_1 + + resource = entry.to_api_repr() + exp_resource = { + "role": None, + "dataset": { + "dataset": {"datasetId": "my_dataset", "projectId": "my-project"}, + "targetTypes": None, + }, + "condition": { + "expression": "request.time < timestamp('2026-01-01T00:00:00Z')", + "title": "Expires end 2025", + "description": "Access expires at the start of 2026.", + }, + } + assert resource == exp_resource + # Check internal representation + assert "dataset" in entry._properties + assert "condition" in entry._properties + + class TestDatasetReference(unittest.TestCase): @staticmethod def _get_target_class(): @@ -821,7 +1074,15 @@ def test_ctor_explicit(self): self.assertEqual( dataset.path, "/projects/%s/datasets/%s" % (OTHER_PROJECT, self.DS_ID) ) - self.assertEqual(dataset.access_entries, entries) + # creating a list of entries relies on AccessEntry.from_api_repr + # which does not create an object in exactly the same way as calling the + # class directly. We rely on calls to .entity_type and .entity_id to + # finalize the settings on each class. + entry_pairs = zip(dataset.access_entries, entries) + for pair in entry_pairs: + assert pair[0].role == pair[1].role + assert pair[0].entity_type == pair[1].entity_type + assert pair[0].entity_id == pair[1].entity_id self.assertIsNone(dataset.created) self.assertIsNone(dataset.full_dataset_id) @@ -854,8 +1115,18 @@ def test_access_entries_setter(self): dataset = self._make_one(self.DS_REF) phred = AccessEntry("OWNER", "userByEmail", "phred@example.com") bharney = AccessEntry("OWNER", "userByEmail", "bharney@example.com") - dataset.access_entries = [phred, bharney] - self.assertEqual(dataset.access_entries, [phred, bharney]) + entries = [phred, bharney] + dataset.access_entries = entries + + # creating a list of entries relies on AccessEntry.from_api_repr + # which does not create an object in exactly the same way as calling the + # class directly. We rely on calls to .entity_type and .entity_id to + # finalize the settings on each class. + entry_pairs = zip(dataset.access_entries, entries) + for pair in entry_pairs: + assert pair[0].role == pair[1].role + assert pair[0].entity_type == pair[1].entity_type + assert pair[0].entity_id == pair[1].entity_id def test_default_partition_expiration_ms(self): dataset = self._make_one("proj.dset") @@ -1383,3 +1654,40 @@ def test_validation_expression_required_from_api(self): ValueError, match="API representation missing required 'expression' field." ): Condition.from_api_repr(api_repr) + + def test___eq___equality(self, condition_1): + result = condition_1 + expected = condition_1 + assert result == expected + + def test___eq___equality_not_condition(self, condition_1): + result = condition_1 + other = "not a condition" + expected = result.__eq__(other) + assert expected is NotImplemented + + def test__ne__not_equality(self): + result = condition_1 + expected = condition_2 + assert result != expected + + def test__hash__function(self, condition_2): + cond1 = Condition( + expression=self.EXPRESSION, title=self.TITLE, description=self.DESCRIPTION + ) + cond2 = cond1 + cond_not_equal = condition_2 + assert cond1 == cond2 + assert cond1 is cond2 + assert hash(cond1) == hash(cond2) + assert hash(cond1) is not None + assert cond_not_equal != cond1 + assert hash(cond_not_equal) != hash(cond1) + + def test__hash__with_minimal_inputs(self): + cond1 = Condition( + expression="example", + title=None, + description=None, + ) + assert hash(cond1) is not None From b7656b97c1bd6c204d0508b1851d114719686655 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 1 May 2025 17:20:21 -0400 Subject: [PATCH 436/536] feat: add dataset access policy version attribute (#2169) * feat: adds condition class and assoc. unit tests * Updates two test cases for empty string * Updates tests for clarity * Updates access_policy_version setter and unittest --- google/cloud/bigquery/dataset.py | 15 +++++++++++++-- tests/unit/test_dataset.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 670fe127c..d225b7106 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -589,6 +589,7 @@ class Dataset(object): "default_rounding_mode": "defaultRoundingMode", "resource_tags": "resourceTags", "external_catalog_dataset_options": "externalCatalogDatasetOptions", + "access_policy_version": "accessPolicyVersion", } def __init__(self, dataset_ref) -> None: @@ -979,6 +980,16 @@ def external_catalog_dataset_options(self, value): self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] ] = (value.to_api_repr() if value is not None else None) + @property + def access_policy_version(self): + return self._properties.get("accessPolicyVersion") + + @access_policy_version.setter + def access_policy_version(self, value): + if not isinstance(value, int) and value is not None: + raise ValueError("Pass an integer, or None") + self._properties["accessPolicyVersion"] = value + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. @@ -1217,8 +1228,8 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": return cls( expression=resource["expression"], - title=resource.get("title", None), - description=resource.get("description", None), + title=resource.get("title"), + description=resource.get("description"), ) def __eq__(self, other: object) -> bool: diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 51f1809bf..941430827 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1049,6 +1049,7 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) self.assertEqual(dataset.is_case_insensitive, False) + self.assertIsNone(dataset.access_policy_version) def test_ctor_string(self): dataset = self._make_one("some-project.some_dset") @@ -1423,6 +1424,35 @@ def test_external_catalog_dataset_options_to_api_repr(self): expected = api_repr["externalCatalogDatasetOptions"] assert result == expected + def test_access_policy_version_valid_input(self): + dataset = self._make_one(self.DS_REF) + # Valid inputs for access_policy_version are currently + # ints 1, 2, 3, and None + # We rely upon the BQ backend to validate acceptable integer + # values, rather than perform that validation in the client. + for expected in [1, 2, 3, None]: + # set property using setter and integer + dataset.access_policy_version = expected + + # check getter and _properties dict + assert ( + dataset.access_policy_version == expected + ), f"Expected {expected} but got {dataset.access_policy_version}" + assert dataset._properties["accessPolicyVersion"] == expected + + def test_access_policy_version_invalid_input(self): + dataset = self._make_one(self.DS_REF) + # Valid inputs for access_policy_version are currently + # ints 1, 2, 3, and None + + with pytest.raises(ValueError): + invalid_value = "a string" + dataset.access_policy_version = invalid_value + + with pytest.raises(ValueError): + invalid_value = 42.0 + dataset.access_policy_version = invalid_value + class TestDatasetListItem(unittest.TestCase): @staticmethod From 46927479085f13fd326e3f2388f60dfdd37f7f69 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 1 May 2025 14:52:26 -0700 Subject: [PATCH 437/536] feat: add WRITE_TRUNCATE_DATA enum (#2166) This PR documents the new WRITE_TRUNCATE_DATA write disposition by adding the enum value. internal issue: b/406848221 --- google/cloud/bigquery/enums.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index b32fc8200..203ea3c7b 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -338,6 +338,10 @@ class WriteDisposition(object): WRITE_TRUNCATE = "WRITE_TRUNCATE" """If the table already exists, BigQuery overwrites the table data.""" + WRITE_TRUNCATE_DATA = "WRITE_TRUNCATE_DATA" + """For existing tables, truncate data but preserve existing schema + and constraints.""" + WRITE_EMPTY = "WRITE_EMPTY" """If the table already exists and contains data, a 'duplicate' error is returned in the job result.""" From 5c8e9179923d914745eaa98fc52a9d8577fe2484 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 12 May 2025 18:34:55 +0200 Subject: [PATCH 438/536] chore(deps): update all dependencies (#2158) * chore(deps): update all dependencies * Update samples/geography/requirements.txt --------- Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 25 ++++++++++++------------ samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 6 +++--- samples/snippets/requirements-test.txt | 2 +- 8 files changed, 22 insertions(+), 21 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 183230cf4..6abea3b4d 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index fa349e0d3..b98f4ace9 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ google-cloud-bigquery==3.31.0 -google-auth-oauthlib==1.2.1 +google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 37bcdf687..2b5a71c8c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,8 +1,9 @@ attrs==25.3.0 -certifi==2025.1.31 +certifi==2025.4.26 cffi==1.17.1 -charset-normalizer==3.4.1 -click==8.1.8 +charset-normalizer==3.4.2 +click===8.1.8; python_version == '3.9' +click==8.2.0; python_version >= '3.10' click-plugins==1.1.1 cligj==0.7.2 db-dtypes==1.4.2 @@ -10,21 +11,21 @@ Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 -google-auth==2.38.0 +google-auth==2.40.1 google-cloud-bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.69.2 +googleapis-common-protos==1.70.0 grpcio==1.71.0 idna==3.10 munch==4.0.0 -mypy-extensions==1.0.0 -packaging==24.2 +mypy-extensions==1.1.0 +packaging==25.0 pandas==2.2.3 proto-plus==1.26.1 -pyarrow==19.0.1 +pyarrow==20.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.22 @@ -33,10 +34,10 @@ python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 -rsa==4.9 +rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.0; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.1 +typing-extensions==4.13.2 typing-inspect==0.9.0 -urllib3==2.3.0 +urllib3==2.4.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 183230cf4..6abea3b4d 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 3ab215951..2c9e158c0 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.9.0 db-dtypes==1.4.2 google.cloud.bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 183230cf4..6abea3b4d 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index ca5505a2e..d1e2f39fb 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ bigquery-magics==0.9.0 db-dtypes==1.4.2 google-cloud-bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' -ipython==9.0.2; python_version >= '3.10' +ipython==9.2.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.1; python_version >= '3.10' +matplotlib==3.10.3; python_version >= '3.10' pandas==2.2.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 0cf0bb6b4..6760e1228 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 From 185116ead5f68b959feb339566e964572fe12692 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 13:04:01 -0400 Subject: [PATCH 439/536] chore(main): release 3.32.0 (#2152) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b115464c..ff1bd7acc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.32.0](https://github.com/googleapis/python-bigquery/compare/v3.31.0...v3.32.0) (2025-05-12) + + +### Features + +* Add dataset access policy version attribute ([#2169](https://github.com/googleapis/python-bigquery/issues/2169)) ([b7656b9](https://github.com/googleapis/python-bigquery/commit/b7656b97c1bd6c204d0508b1851d114719686655)) +* Add preview support for incremental results ([#2145](https://github.com/googleapis/python-bigquery/issues/2145)) ([22b80bb](https://github.com/googleapis/python-bigquery/commit/22b80bba9d0bed319fd3102e567906c9b458dd02)) +* Add WRITE_TRUNCATE_DATA enum ([#2166](https://github.com/googleapis/python-bigquery/issues/2166)) ([4692747](https://github.com/googleapis/python-bigquery/commit/46927479085f13fd326e3f2388f60dfdd37f7f69)) +* Adds condition class and assoc. unit tests ([#2159](https://github.com/googleapis/python-bigquery/issues/2159)) ([a69d6b7](https://github.com/googleapis/python-bigquery/commit/a69d6b796d2edb6ba453980c9553bc9b206c5a6e)) +* Support BigLakeConfiguration (managed Iceberg tables) ([#2162](https://github.com/googleapis/python-bigquery/issues/2162)) ([a1c8e9a](https://github.com/googleapis/python-bigquery/commit/a1c8e9aaf60986924868d54a0ab0334e77002a39)) +* Update the AccessEntry class with a new condition attribute and unit tests ([#2163](https://github.com/googleapis/python-bigquery/issues/2163)) ([7301667](https://github.com/googleapis/python-bigquery/commit/7301667272dfbdd04b1a831418a9ad2d037171fb)) + + +### Bug Fixes + +* `query()` now warns when `job_id` is set and the default `job_retry` is ignored ([#2167](https://github.com/googleapis/python-bigquery/issues/2167)) ([ca1798a](https://github.com/googleapis/python-bigquery/commit/ca1798aaee2d5905fe688d3097f8ee5c989da333)) +* Empty record dtypes ([#2147](https://github.com/googleapis/python-bigquery/issues/2147)) ([77d7173](https://github.com/googleapis/python-bigquery/commit/77d71736fcc006d3ab8f8ba17955ad5f06e21876)) +* Table iterator should not use bqstorage when page_size is not None ([#2154](https://github.com/googleapis/python-bigquery/issues/2154)) ([e89a707](https://github.com/googleapis/python-bigquery/commit/e89a707b162182ededbf94cc9a0f7594bc2be475)) + ## [3.31.0](https://github.com/googleapis/python-bigquery/compare/v3.30.0...v3.31.0) (2025-03-20) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index c0f7a96d6..fe13d2477 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.31.0" +__version__ = "3.32.0" From 156e518c46b5efc7bcfc674c9cccbd2492bcacbe Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 12 May 2025 20:15:23 +0200 Subject: [PATCH 440/536] chore(deps): update dependency db-dtypes to v1.4.3 (#2178) Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 2b5a71c8c..3ff1b2944 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -6,7 +6,7 @@ click===8.1.8; python_version == '3.9' click==8.2.0; python_version >= '3.10' click-plugins==1.1.1 cligj==0.7.2 -db-dtypes==1.4.2 +db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 2c9e158c0..b000aa50c 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ bigquery_magics==0.9.0 -db-dtypes==1.4.2 +db-dtypes==1.4.3 google.cloud.bigquery==3.31.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index d1e2f39fb..d80ffcd09 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ bigquery-magics==0.9.0 -db-dtypes==1.4.2 +db-dtypes==1.4.3 google-cloud-bigquery==3.31.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' From 57f940d957613b4d80fb81ea40a1177b73856189 Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Mon, 12 May 2025 13:10:11 -0700 Subject: [PATCH 441/536] feat: add ability to set autodetect_schema query param in update_table (#2171) * Add ability to set autodetect_schema query_param * fixup! Add ability to set autodetect_schema query_param * fixup! Add ability to set autodetect_schema query_param * fixup! Add ability to set autodetect_schema query_param --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/client.py | 11 ++++++++ tests/system/test_client.py | 47 +++++++++++++++++++++++++++++++++ tests/unit/test_client.py | 12 ++++++--- 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e7cafc47e..8ad1586f4 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1389,6 +1389,7 @@ def update_table( self, table: Table, fields: Sequence[str], + autodetect_schema: bool = False, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: @@ -1419,6 +1420,10 @@ def update_table( fields (Sequence[str]): The fields of ``table`` to change, spelled as the :class:`~google.cloud.bigquery.table.Table` properties. + autodetect_schema (bool): + Specifies if the schema of the table should be autodetected when + updating the table from the underlying source. Only applicable + for external tables. retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1438,12 +1443,18 @@ def update_table( path = table.path span_attributes = {"path": path, "fields": fields} + if autodetect_schema: + query_params = {"autodetect_schema": True} + else: + query_params = {} + api_response = self._call_api( retry, span_name="BigQuery.updateTable", span_attributes=span_attributes, method="PATCH", path=path, + query_params=query_params, data=partial, headers=headers, timeout=timeout, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 9df572b14..6584ca03c 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -978,6 +978,53 @@ def test_update_table_constraints(self): ) self.assertIsNone(reference_table3.table_constraints, None) + def test_update_table_autodetect_schema(self): + dataset = self.temp_dataset(_make_dataset_id("bq_update_table_test")) + + # Create an external table, restrict schema to one field + TABLE_NAME = "test_table" + set_schema = [bigquery.SchemaField("username", "STRING", mode="NULLABLE")] + table_arg = Table(dataset.table(TABLE_NAME)) + + # Create an external_config and include it in the table arguments + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) + external_config.source_uris = SOURCE_URIS_AVRO + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + external_config.schema = set_schema + table_arg.external_data_configuration = external_config + + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + self.assertEqual(table.schema, set_schema) + + # Update table with schema autodetection + updated_table_arg = Table(dataset.table(TABLE_NAME)) + + # Update the external_config and include it in the table arguments + updated_external_config = copy.deepcopy(external_config) + updated_external_config.autodetect = True + updated_external_config.schema = None + updated_table_arg.external_data_configuration = updated_external_config + + # PATCH call with autodetect_schema=True to trigger schema inference + updated_table = Config.CLIENT.update_table( + updated_table_arg, ["external_data_configuration"], autodetect_schema=True + ) + + # The updated table should have a schema inferred from the reference + # file, which has all four fields. + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + self.assertEqual(updated_table.schema, expected_schema) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 34ef680dd..b8140df66 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2385,7 +2385,7 @@ def test_update_table(self): "resourceTags": {"123456789012/key": "value"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5, query_params={} ) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) @@ -2439,6 +2439,7 @@ def test_update_table_w_custom_property(self): path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -2475,6 +2476,7 @@ def test_update_table_only_use_legacy_sql(self): path="/%s" % path, data={"view": {"useLegacySql": True}}, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2567,9 +2569,10 @@ def test_update_table_w_query(self): "schema": schema_resource, }, timeout=DEFAULT_TIMEOUT, + query_params={}, ) - def test_update_table_w_schema_None(self): + def test_update_table_w_schema_None_autodetect_schema(self): # Simulate deleting schema: not sure if back-end will actually # allow this operation, but the spec says it is optional. path = "projects/%s/datasets/%s/tables/%s" % ( @@ -2611,7 +2614,9 @@ def test_update_table_w_schema_None(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["schema"]) + updated_table = client.update_table( + table, ["schema"], autodetect_schema=True + ) final_attributes.assert_called_once_with( {"path": "/%s" % path, "fields": ["schema"]}, client, None @@ -2623,6 +2628,7 @@ def test_update_table_w_schema_None(self): sent = {"schema": {"fields": None}} self.assertEqual(req[1]["data"], sent) self.assertEqual(req[1]["path"], "/%s" % path) + self.assertEqual(req[1]["query_params"], {"autodetect_schema": True}) self.assertEqual(len(updated_table.schema), 0) def test_update_table_delete_property(self): From 2d173a5bd8a29dfebe492bc3a79469da7f0fcfbd Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 07:53:27 -0400 Subject: [PATCH 442/536] chore(python): remove docs from templates (#2164) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): remove docs from templates Source-Link: https://github.com/googleapis/synthtool/commit/3fca64a4bb1772258f8cc939a9192b17dbbbf335 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3b3a31be60853477bc39ed8d9bac162cac3ba083724cecaad54eb81d4e4dae9c * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove replacement in owlbot.py --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou Co-authored-by: Chalmer Lowe --- .github/.OwlBot.lock.yaml | 4 ++-- owlbot.py | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 51b21a62b..cea9eb68f 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb -# created: 2025-04-10T17:00:10.042601326Z + digest: sha256:3b3a31be60853477bc39ed8d9bac162cac3ba083724cecaad54eb81d4e4dae9c +# created: 2025-04-16T22:40:03.123475241Z diff --git a/owlbot.py b/owlbot.py index 8cfa2b097..60759adbe 100644 --- a/owlbot.py +++ b/owlbot.py @@ -109,16 +109,6 @@ python.py_samples() -s.replace( - "docs/conf.py", - r'\{"members": True\}', - '{"members": True, "inherited-members": True}', -) -s.replace( - "docs/conf.py", - r"exclude_patterns = \[", - '\\g<0>\n "google/cloud/bigquery_v2/**", # Legacy proto-based types.', -) s.replace( "samples/**/noxfile.py", 'BLACK_VERSION = "black==22.3.0"', From 02176377d5e2fc25b5cd4f46aa6ebfb1b6a960a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 14 May 2025 04:36:37 -0500 Subject: [PATCH 443/536] fix: ensure AccessEntry equality and repr uses the correct `entity_type` (#2182) * fix: ensure AccessEntry equality and repr uses the correct `entity_type` * add a test for access_entries --- google/cloud/bigquery/dataset.py | 4 +-- tests/unit/test_dataset.py | 44 ++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index d225b7106..f788275cd 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -512,7 +512,7 @@ def __ne__(self, other): return not self == other def __repr__(self): - return f"" + return f"" def _key(self): """A tuple key that uniquely describes this field. @@ -531,7 +531,7 @@ def _key(self): properties["condition"] = condition_key prop_tup = tuple(sorted(properties.items())) - return (self.role, self._entity_type, self.entity_id, prop_tup) + return (self.role, self.entity_type, self.entity_id, prop_tup) def __hash__(self): return hash(self._key()) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 941430827..5cce2a9a7 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -613,6 +613,15 @@ def test_equality_and_hash_without_condition(self): assert hash(entry1) == hash(entry2) assert hash(entry1) != hash(entry3) # Usually true + def test_equality_and_hash_from_api_repr(self): + """Compare equal entries where one was created via from_api_repr.""" + entry1 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry2 = AccessEntry.from_api_repr( + {"role": "OWNER", "specialGroup": "projectOwners"} + ) + assert entry1 == entry2 + assert hash(entry1) == hash(entry2) + def test_equality_and_hash_with_condition(self, condition_1, condition_2): cond1a = Condition( condition_1.expression, condition_1.title, condition_1.description @@ -746,6 +755,13 @@ def test_dataset_property_with_condition(self, condition_1): assert "dataset" in entry._properties assert "condition" in entry._properties + def test_repr_from_api_repr(self): + """Check that repr() includes the correct entity_type when the object is initialized from a dictionary.""" + api_repr = {"role": "OWNER", "userByEmail": "owner@example.com"} + entry = AccessEntry.from_api_repr(api_repr) + entry_str = repr(entry) + assert entry_str == "" + class TestDatasetReference(unittest.TestCase): @staticmethod @@ -1097,6 +1113,34 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.location) self.assertEqual(dataset.is_case_insensitive, False) + def test_access_entries_getter_from_api_repr(self): + """Check that `in` works correctly when Dataset is made via from_api_repr().""" + from google.cloud.bigquery.dataset import AccessEntry + + dataset = self._get_target_class().from_api_repr( + { + "datasetReference": {"projectId": "my-proj", "datasetId": "my_dset"}, + "access": [ + { + "role": "OWNER", + "userByEmail": "uilma@example.com", + }, + { + "role": "READER", + "groupByEmail": "rhubbles@example.com", + }, + ], + } + ) + assert ( + AccessEntry("OWNER", "userByEmail", "uilma@example.com") + in dataset.access_entries + ) + assert ( + AccessEntry("READER", "groupByEmail", "rhubbles@example.com") + in dataset.access_entries + ) + def test_access_entries_setter_non_list(self): dataset = self._make_one(self.DS_REF) with self.assertRaises(TypeError): From ebfd0a83d43bcb96f65f5669437220aa6138b766 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 14 May 2025 13:34:26 -0400 Subject: [PATCH 444/536] feat: Add dtype parameters to to_geodataframe functions (#2176) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Add dtype parameters to to_geodataframe This change adds support for `bool_dtype`, `int_dtype`, `float_dtype`, and `string_dtype` parameters to the `to_geodataframe` method in `RowIterator` and `QueryJob`. These parameters allow you to specify the desired pandas dtypes for boolean, integer, float, and string columns when converting BigQuery results to GeoDataFrames. The changes include: - Updating `RowIterator.to_geodataframe` to accept and pass these dtype parameters to the underlying `to_dataframe` method. - Updating `QueryJob.to_geodataframe` to accept and pass these dtype parameters to the underlying `RowIterator.to_geodataframe` method. - Adding unit tests to verify the correct handling of these parameters. * updates to several tests re geopandas as well as imports * updates to enum import * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update pyproject.toml Co-authored-by: Tim Sweña (Swast) * Update testing/constraints-3.9.txt Co-authored-by: Tim Sweña (Swast) --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Tim Sweña (Swast) --- docs/conf.py | 3 +- google/cloud/bigquery/job/query.py | 36 ++++++++++ google/cloud/bigquery/table.py | 44 ++++++++++++ noxfile.py | 4 +- pyproject.toml | 8 ++- testing/constraints-3.9.txt | 2 +- tests/unit/job/test_query_pandas.py | 6 ++ tests/unit/test_table.py | 9 ++- tests/unit/test_table_pandas.py | 103 ++++++++++++++++++++++++++++ 9 files changed, 205 insertions(+), 10 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 826298090..df1c18b68 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,7 +61,7 @@ # autodoc/autosummary flags autoclass_content = "both" -autodoc_default_options = {"members": True, "inherited-members": True} +autodoc_default_options = {"members": True} autosummary_generate = True @@ -109,7 +109,6 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [ - "google/cloud/bigquery_v2/**", # Legacy proto-based types. "_build", "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index f14039bc0..f9b99b7fb 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -2102,6 +2102,10 @@ def to_geodataframe( create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_column: Optional[str] = None, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "geopandas.GeoDataFrame": """Return a GeoPandas GeoDataFrame from a QueryJob @@ -2152,6 +2156,34 @@ def to_geodataframe( identifies which one to use to construct a GeoPandas GeoDataFrame. This option can be ommitted if there's only one GEOGRAPHY column. + bool_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) + to convert BigQuery Boolean type, instead of relying on the default + ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type + int_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) + to convert BigQuery Integer types, instead of relying on the default + ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("int64")``. A list of BigQuery + Integer types can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + float_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) + to convert BigQuery Float type, instead of relying on the default + ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("float64")``. BigQuery Float + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + string_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to + convert BigQuery String type, instead of relying on the default + ``numpy.dtype("object")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("object")``. BigQuery String + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type Returns: geopandas.GeoDataFrame: @@ -2175,6 +2207,10 @@ def to_geodataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, geography_column=geography_column, + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, ) def __iter__(self): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 503ca4e71..e084468f6 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2727,6 +2727,10 @@ def to_geodataframe( progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_column: Optional[str] = None, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "geopandas.GeoDataFrame": """Create a GeoPandas GeoDataFrame by loading all pages of a query. @@ -2778,6 +2782,34 @@ def to_geodataframe( identifies which one to use to construct a geopandas GeoDataFrame. This option can be ommitted if there's only one GEOGRAPHY column. + bool_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) + to convert BigQuery Boolean type, instead of relying on the default + ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type + int_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) + to convert BigQuery Integer types, instead of relying on the default + ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("int64")``. A list of BigQuery + Integer types can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + float_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) + to convert BigQuery Float type, instead of relying on the default + ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("float64")``. BigQuery Float + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + string_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to + convert BigQuery String type, instead of relying on the default + ``numpy.dtype("object")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("object")``. BigQuery String + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type Returns: geopandas.GeoDataFrame: @@ -2829,6 +2861,10 @@ def to_geodataframe( progress_bar_type, create_bqstorage_client, geography_as_object=True, + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, ) return geopandas.GeoDataFrame( @@ -2932,6 +2968,10 @@ def to_geodataframe( progress_bar_type=None, create_bqstorage_client=True, geography_column: Optional[str] = None, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2941,6 +2981,10 @@ def to_geodataframe( progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. geography_column (str): Ignored. Added for compatibility with RowIterator. + bool_dtype (Any): Ignored. Added for compatibility with RowIterator. + int_dtype (Any): Ignored. Added for compatibility with RowIterator. + float_dtype (Any): Ignored. Added for compatibility with RowIterator. + string_dtype (Any): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/noxfile.py b/noxfile.py index c2b4bbb50..1922a68a5 100644 --- a/noxfile.py +++ b/noxfile.py @@ -109,9 +109,7 @@ def default(session, install_extras=True): # that logic (and the associated tests) we avoid installing the [ipython] extra # which has a downstream effect of then avoiding installing bigquery_magics. if install_extras and session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - install_target = ( - ".[bqstorage,pandas,ipywidgets,geopandas,tqdm,opentelemetry,bigquery_v2]" - ) + install_target = ".[bqstorage,pandas,ipywidgets,geopandas,matplotlib,tqdm,opentelemetry,bigquery_v2]" elif install_extras: # run against all other UNIT_TEST_PYTHON_VERSIONS install_target = ".[all]" else: diff --git a/pyproject.toml b/pyproject.toml index 38d74cdd0..9c91a2fc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,11 @@ pandas = [ ipywidgets = ["ipywidgets >= 7.7.1", "ipykernel >= 6.2.0"] geopandas = ["geopandas >= 0.9.0, < 2.0.0", "Shapely >= 1.8.4, < 3.0.0"] ipython = ["ipython >= 7.23.1", "bigquery-magics >= 0.6.0"] -tqdm = ["tqdm >= 4.7.4, < 5.0.0"] +matplotlib = [ + "matplotlib >= 3.7.1, <= 3.9.2; python_version == '3.9'", + "matplotlib >= 3.10.3; python_version >= '3.10'", +] +tqdm = ["tqdm >= 4.23.4, < 5.0.0"] opentelemetry = [ "opentelemetry-api >= 1.1.0", "opentelemetry-sdk >= 1.1.0", @@ -93,7 +97,7 @@ bigquery_v2 = [ "protobuf >= 3.20.2, < 7.0.0, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. ] all = [ - "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,tqdm,opentelemetry,bigquery_v2]", + "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,matplotlib,tqdm,opentelemetry,bigquery_v2]", ] [tool.setuptools.dynamic] diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index cb6c29f3b..60a155f0d 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -29,4 +29,4 @@ pyarrow==4.0.0 python-dateutil==2.8.2 requests==2.21.0 Shapely==1.8.4 -tqdm==4.7.4 +matplotlib==3.7.1 diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 2cda59bd1..d82f0dfe3 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -22,6 +22,7 @@ from ..helpers import make_connection from .helpers import _make_client from .helpers import _make_job_resource +from google.cloud.bigquery.enums import DefaultPandasDTypes try: from google.cloud import bigquery_storage @@ -30,6 +31,7 @@ except (ImportError, AttributeError): bigquery_storage = None + try: import shapely except (ImportError, AttributeError): @@ -1019,5 +1021,9 @@ def test_query_job_to_geodataframe_delegation(wait_for_query): progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, geography_column=geography_column, + bool_dtype=DefaultPandasDTypes.BOOL_DTYPE, + int_dtype=DefaultPandasDTypes.INT_DTYPE, + float_dtype=None, + string_dtype=None, ) assert df is row_iterator.to_geodataframe.return_value diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 253006547..8daa4ce43 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -31,6 +31,7 @@ from google.cloud.bigquery import exceptions from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +from google.cloud.bigquery.enums import DefaultPandasDTypes from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -4065,7 +4066,7 @@ def test_to_dataframe_no_tqdm(self): def test_to_dataframe_tqdm_error(self): pytest.importorskip("pandas") - pytest.importorskip("tqdm") + tqdm = pytest.importorskip("tqdm") mock.patch("tqdm.tqdm_gui", new=None) mock.patch("tqdm.notebook.tqdm", new=None) mock.patch("tqdm.tqdm", new=None) @@ -4100,7 +4101,7 @@ def test_to_dataframe_tqdm_error(self): for warning in warned: # pragma: NO COVER self.assertIn( warning.category, - [UserWarning, DeprecationWarning], + [UserWarning, DeprecationWarning, tqdm.TqdmExperimentalWarning], ) def test_to_dataframe_w_empty_results(self): @@ -5639,6 +5640,10 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): progress_bar_type, create_bqstorage_client, geography_as_object=True, + bool_dtype=DefaultPandasDTypes.BOOL_DTYPE, + int_dtype=DefaultPandasDTypes.INT_DTYPE, + float_dtype=None, + string_dtype=None, ) self.assertIsInstance(df, geopandas.GeoDataFrame) diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 94737732b..43d64d77d 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -261,3 +261,106 @@ def test_to_dataframe_with_jobs_query_response(class_under_test): "Tiffani", ] assert list(df["number"]) == [6, 325, 26, 10, 17, 22, 6, 229, 8] + + +@mock.patch("google.cloud.bigquery.table.geopandas") +def test_rowiterator_to_geodataframe_with_default_dtypes( + mock_geopandas, monkeypatch, class_under_test +): + mock_geopandas.GeoDataFrame = mock.Mock(spec=True) + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + schema = [ + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("string_col", "STRING"), + ] + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, schema) + + mock_df = pandas.DataFrame( + { + "geo_col": ["POINT (1 2)"], + "bool_col": [True], + "int_col": [123], + "float_col": [1.23], + "string_col": ["abc"], + } + ) + rows.to_dataframe = mock.Mock(return_value=mock_df) + + rows.to_geodataframe(geography_column="geo_col") + + rows.to_dataframe.assert_called_once_with( + None, # bqstorage_client + None, # dtypes + None, # progress_bar_type + True, # create_bqstorage_client + geography_as_object=True, + bool_dtype=bigquery.enums.DefaultPandasDTypes.BOOL_DTYPE, + int_dtype=bigquery.enums.DefaultPandasDTypes.INT_DTYPE, + float_dtype=None, + string_dtype=None, + ) + mock_geopandas.GeoDataFrame.assert_called_once_with( + mock_df, crs="EPSG:4326", geometry="geo_col" + ) + + +@mock.patch("google.cloud.bigquery.table.geopandas") +def test_rowiterator_to_geodataframe_with_custom_dtypes( + mock_geopandas, monkeypatch, class_under_test +): + mock_geopandas.GeoDataFrame = mock.Mock(spec=True) + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + schema = [ + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("string_col", "STRING"), + ] + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, schema) + + mock_df = pandas.DataFrame( + { + "geo_col": ["POINT (3 4)"], + "bool_col": [False], + "int_col": [456], + "float_col": [4.56], + "string_col": ["def"], + } + ) + rows.to_dataframe = mock.Mock(return_value=mock_df) + + custom_bool_dtype = "bool" + custom_int_dtype = "int32" + custom_float_dtype = "float32" + custom_string_dtype = "string" + + rows.to_geodataframe( + geography_column="geo_col", + bool_dtype=custom_bool_dtype, + int_dtype=custom_int_dtype, + float_dtype=custom_float_dtype, + string_dtype=custom_string_dtype, + ) + + rows.to_dataframe.assert_called_once_with( + None, # bqstorage_client + None, # dtypes + None, # progress_bar_type + True, # create_bqstorage_client + geography_as_object=True, + bool_dtype=custom_bool_dtype, + int_dtype=custom_int_dtype, + float_dtype=custom_float_dtype, + string_dtype=custom_string_dtype, + ) + mock_geopandas.GeoDataFrame.assert_called_once_with( + mock_df, crs="EPSG:4326", geometry="geo_col" + ) From 2140a51bac95ab600759bdee576cf3a41c7dc834 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 14 May 2025 13:59:44 -0400 Subject: [PATCH 445/536] chore: Fix two types of warnings in unit tests (#2183) * Fix two types of warnings in unit tests This commit addresses two warnings that appear when running unit tests: 1. `PytestRemovedIn9Warning` in `tests/unit/test_opentelemetry_tracing.py`: Removed a `@pytest.mark.skipif` decorator from a fixture. The skip condition is already present on the test methods using the fixture. 2. `FutureWarning` in `tests/unit/test_client.py`: Updated calls to `client.query()` to include `job_retry=None` when `job_id` is also specified. This is to avoid ambiguity as BigQuery cannot retry a failed job with the exact same ID. * Update tests/unit/test_client.py * Update tests/unit/test_client.py * Update linting * adds more examples of functions where job_retry is needed --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- tests/unit/test_client.py | 52 ++++++++++++++++++------ tests/unit/test_opentelemetry_tracing.py | 1 - 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index b8140df66..a35338698 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -4719,7 +4719,7 @@ def test_query_w_api_method_query_and_job_id_fails(self): client._connection = make_connection({}) with self.assertRaises(TypeError) as exc: - client.query(query, job_id="abcd", api_method="QUERY") + client.query(query, job_id="abcd", api_method="QUERY", job_retry=None) self.assertIn( "`job_id` was provided, but the 'QUERY' `api_method` was requested", exc.exception.args[0], @@ -4774,7 +4774,11 @@ def test_query_w_explicit_project(self): conn = client._connection = make_connection(resource) client.query( - query, job_id=job_id, project="other-project", location=self.LOCATION + query, + job_id=job_id, + project="other-project", + location=self.LOCATION, + job_retry=None, ) # Check that query actually starts the job. @@ -4833,7 +4837,11 @@ def test_query_w_explicit_job_config(self): original_config_copy = copy.deepcopy(job_config) client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) # Check that query actually starts the job. @@ -4884,7 +4892,11 @@ def test_query_preserving_explicit_job_config(self): original_config_copy = copy.deepcopy(job_config) client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) # Check that query actually starts the job. @@ -4940,7 +4952,13 @@ def test_query_preserving_explicit_default_job_config(self): ) conn = client._connection = make_connection(resource) - client.query(query, job_id=job_id, location=self.LOCATION, job_config=None) + client.query( + query, + job_id=job_id, + location=self.LOCATION, + job_config=None, + job_retry=None, + ) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -4978,7 +4996,11 @@ def test_query_w_invalid_job_config(self): with self.assertRaises(TypeError) as exc: client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) self.assertIn("Expected an instance of QueryJobConfig", exc.exception.args[0]) @@ -5027,7 +5049,11 @@ def test_query_w_explicit_job_config_override(self): job_config.default_dataset = None client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) # Check that query actually starts the job. @@ -5072,7 +5098,7 @@ def test_query_w_client_default_config_no_incoming(self): ) conn = client._connection = make_connection(resource) - client.query(query, job_id=job_id, location=self.LOCATION) + client.query(query, job_id=job_id, location=self.LOCATION, job_retry=None) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -5114,7 +5140,7 @@ def test_query_w_client_location(self): ) conn = client._connection = make_connection(resource) - client.query(query, job_id=job_id, project="other-project") + client.query(query, job_id=job_id, project="other-project", job_retry=None) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -5178,7 +5204,7 @@ def test_query_w_udf_resources(self): config.udf_resources = udf_resources config.use_legacy_sql = True - job = client.query(QUERY, job_config=config, job_id=JOB) + job = client.query(QUERY, job_config=config, job_id=JOB, job_retry=None) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) @@ -5234,7 +5260,7 @@ def test_query_w_query_parameters(self): config = QueryJobConfig() config.query_parameters = query_parameters - job = client.query(QUERY, job_config=config, job_id=JOB) + job = client.query(QUERY, job_config=config, job_id=JOB, job_retry=None) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) @@ -5277,7 +5303,7 @@ def test_query_job_rpc_fail_w_random_error(self): ) with job_begin_patcher: with pytest.raises(Unknown, match="Not sure what went wrong."): - client.query("SELECT 1;", job_id="123") + client.query("SELECT 1;", job_id="123", job_retry=None) def test_query_job_rpc_fail_w_conflict_job_id_given(self): from google.api_core.exceptions import Conflict @@ -5293,7 +5319,7 @@ def test_query_job_rpc_fail_w_conflict_job_id_given(self): ) with job_begin_patcher: with pytest.raises(Conflict, match="Job already exists."): - client.query("SELECT 1;", job_id="123") + client.query("SELECT 1;", job_id="123", job_retry=None) def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): from google.api_core.exceptions import Conflict diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index 546cc02bd..57132a1b9 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -42,7 +42,6 @@ TEST_SPAN_ATTRIBUTES = {"foo": "baz"} -@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") @pytest.fixture def setup(): importlib.reload(opentelemetry_tracing) From 110ad603cf61566c3421e26a028d897135e526d3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 14 May 2025 20:42:29 +0200 Subject: [PATCH 446/536] chore(deps): update all dependencies to v3.32.0 (#2179) Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index b98f4ace9..743d0fe35 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3ff1b2944..434a594cb 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 google-auth==2.40.1 -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index b000aa50c..bb60f2a67 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.9.0 db-dtypes==1.4.3 -google.cloud.bigquery==3.31.0 +google.cloud.bigquery==3.32.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index d80ffcd09..17f43bf78 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ bigquery-magics==0.9.0 db-dtypes==1.4.3 -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' ipython==9.2.0; python_version >= '3.10' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4b88c6b70..c31815d69 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 From 5805066d9dfb696e7f514569567a5432ee98ad2b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 15 May 2025 05:11:00 -0400 Subject: [PATCH 447/536] refactor: Fix DeprecationWarnings for datetime methods in job tests (#2185) * Fix DeprecationWarnings for datetime methods in job tests Replaced calls to deprecated `datetime.datetime.utcnow()` with `datetime.datetime.now(datetime.UTC)` in `tests/unit/job/test_base.py`. Replaced calls to deprecated `datetime.datetime.utcfromtimestamp()` with `datetime.datetime.fromtimestamp(timestamp, datetime.UTC)` in `tests/unit/job/helpers.py`. These changes address the specific warnings identified in the issue for these two files. * Update tests/unit/job/test_base.py * Update tests/unit/job/test_base.py * Updates datetime code related to UTC --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- tests/unit/job/helpers.py | 4 +++- tests/unit/job/test_base.py | 2 +- tests/unit/test__pandas_helpers.py | 2 +- tests/unit/test_client.py | 17 ++++++++--------- tests/unit/test_dataset.py | 4 +++- tests/unit/test_query.py | 17 +++++++++-------- tests/unit/test_table.py | 8 ++++++-- 7 files changed, 31 insertions(+), 23 deletions(-) diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index 3642c7229..24ba2fa99 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -106,7 +106,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.ETAG = "ETAG" self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format( diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 2d2f0c13c..22a0fa450 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -331,7 +331,7 @@ def _datetime_and_millis(): import datetime from google.cloud._helpers import _millis - now = datetime.datetime.utcnow().replace( + now = datetime.datetime.now(datetime.timezone.utc).replace( microsecond=123000, tzinfo=datetime.timezone.utc, # stats timestamps have ms precision ) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 48c085c1d..d6ea5df7e 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -886,7 +886,7 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_json_generator(module_under_test): - utcnow = datetime.datetime.utcnow() + utcnow = datetime.datetime.now(datetime.timezone.utc) dataframe = pandas.DataFrame( { "a_series": [1, 2, 3, 4], diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index a35338698..468068321 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5853,7 +5853,7 @@ def test_insert_rows_w_schema(self): from google.cloud.bigquery.schema import SchemaField WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) + WHEN = datetime.datetime.fromtimestamp(WHEN_TS, UTC).replace(tzinfo=UTC) PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( self.PROJECT, self.DS_ID, @@ -5914,7 +5914,7 @@ def test_insert_rows_w_list_of_dictionaries(self): from google.cloud.bigquery.table import Table WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) + WHEN = datetime.datetime.fromtimestamp(WHEN_TS, UTC).replace(tzinfo=UTC) PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( self.PROJECT, self.DS_ID, @@ -6097,6 +6097,7 @@ def _row_data(row): ) def test_insert_rows_w_repeated_fields(self): + from google.cloud._helpers import UTC from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -6126,12 +6127,8 @@ def test_insert_rows_w_repeated_fields(self): ( 12, [ - datetime.datetime( - 2018, 12, 1, 12, 0, 0, tzinfo=datetime.timezone.utc - ), - datetime.datetime( - 2018, 12, 1, 13, 0, 0, tzinfo=datetime.timezone.utc - ), + datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=UTC), + datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=UTC), ], [1.25, 2.5], ), @@ -6966,7 +6963,9 @@ def test_list_rows(self): ) WHEN_TS = 1437767599006000 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS / 1e6).replace(tzinfo=UTC) + WHEN = datetime.datetime.fromtimestamp( + WHEN_TS / 1e6, datetime.timezone.utc + ).replace(tzinfo=UTC) WHEN_1 = WHEN + datetime.timedelta(microseconds=1) WHEN_2 = WHEN + datetime.timedelta(microseconds=2) ROWS = 1234 diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 5cce2a9a7..3fd2579af 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -945,7 +945,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.ETAG = "ETAG" self.DS_FULL_ID = "%s:%s" % (self.PROJECT, self.DS_ID) self.RESOURCE_URL = "http://example.com/path/to/resource" diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 40ef080f7..0d967bdb8 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -637,9 +637,9 @@ def test_to_api_repr_w_timestamp_datetime(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_timestamp_micros(self): - from google.cloud._helpers import _microseconds_from_datetime + from google.cloud._helpers import _microseconds_from_datetime, UTC - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) seconds = _microseconds_from_datetime(now) / 1.0e6 EXPECTED = { "parameterType": {"type": "TIMESTAMP"}, @@ -650,9 +650,9 @@ def test_to_api_repr_w_timestamp_micros(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_datetime(self): - from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _datetime_to_rfc3339, UTC - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) EXPECTED = { "parameterType": {"type": "DATETIME"}, "parameterValue": { @@ -664,9 +664,9 @@ def test_to_api_repr_w_datetime_datetime(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_string(self): - from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _datetime_to_rfc3339, UTC - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) now_str = _datetime_to_rfc3339(now) EXPECTED = { "parameterType": {"type": "DATETIME"}, @@ -1047,9 +1047,10 @@ def test_to_api_repr_w_datetime_str(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_datetime(self): + from google.cloud._helpers import UTC # type: ignore from google.cloud.bigquery._helpers import _RFC3339_MICROS_NO_ZULU - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) now_str = now.strftime(_RFC3339_MICROS_NO_ZULU) EXPECTED = { "parameterType": { @@ -1089,7 +1090,7 @@ def test_to_api_repr_w_timestamp_str(self): def test_to_api_repr_w_timestamp_timestamp(self): from google.cloud._helpers import UTC # type: ignore - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) now = now.astimezone(UTC) now_str = str(now) EXPECTED = { diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 8daa4ce43..92fa0e2ec 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -395,7 +395,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.ETAG = "ETAG" self.TABLE_FULL_ID = "%s:%s.%s" % (self.PROJECT, self.DS_ID, self.TABLE_NAME) self.RESOURCE_URL = "http://example.com/path/to/resource" @@ -1952,7 +1954,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.125 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) def test_ctor(self): From 7ec2848379d5743bbcb36700a1153540c451e0e0 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 19 May 2025 10:52:16 -0700 Subject: [PATCH 448/536] fix: ensure SchemaField.field_dtype returns a string (#2188) * fix: ensure SchemaField.field_dtype returns a string * fix cover tests * fix unit 3.9 --- google/cloud/bigquery/_pandas_helpers.py | 154 ++++++++++------------- google/cloud/bigquery/schema.py | 28 ++--- tests/unit/test__pandas_helpers.py | 113 ++++++----------- tests/unit/test_schema.py | 5 - 4 files changed, 113 insertions(+), 187 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 457eb9078..6691e7ef6 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -508,31 +508,37 @@ def dataframe_to_bq_schema(dataframe, bq_schema): bq_schema_unused = set() bq_schema_out = [] - unknown_type_fields = [] - + unknown_type_columns = [] + dataframe_reset_index = dataframe.reset_index() for column, dtype in list_columns_and_indexes(dataframe): - # Use provided type from schema, if present. + # Step 1: use provided type from schema, if present. bq_field = bq_schema_index.get(column) if bq_field: bq_schema_out.append(bq_field) bq_schema_unused.discard(bq_field.name) continue - # Otherwise, try to automatically determine the type based on the + # Step 2: try to automatically determine the type based on the # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) if bq_type is None: - sample_data = _first_valid(dataframe.reset_index()[column]) + sample_data = _first_valid(dataframe_reset_index[column]) if ( isinstance(sample_data, _BaseGeometry) and sample_data is not None # Paranoia ): bq_type = "GEOGRAPHY" - bq_field = schema.SchemaField(column, bq_type) - bq_schema_out.append(bq_field) + if bq_type is not None: + bq_schema_out.append(schema.SchemaField(column, bq_type)) + continue + + # Step 3: try with pyarrow if available + bq_field = _get_schema_by_pyarrow(column, dataframe_reset_index[column]) + if bq_field is not None: + bq_schema_out.append(bq_field) + continue - if bq_field.field_type is None: - unknown_type_fields.append(bq_field) + unknown_type_columns.append(column) # Catch any schema mismatch. The developer explicitly asked to serialize a # column, but it was not found. @@ -543,98 +549,70 @@ def dataframe_to_bq_schema(dataframe, bq_schema): ) ) - # If schema detection was not successful for all columns, also try with - # pyarrow, if available. - if unknown_type_fields: - if not pyarrow: - msg = "Could not determine the type of columns: {}".format( - ", ".join(field.name for field in unknown_type_fields) - ) - warnings.warn(msg) - return None # We cannot detect the schema in full. - - # The augment_schema() helper itself will also issue unknown type - # warnings if detection still fails for any of the fields. - bq_schema_out = augment_schema(dataframe, bq_schema_out) + if unknown_type_columns != []: + msg = "Could not determine the type of columns: {}".format( + ", ".join(unknown_type_columns) + ) + warnings.warn(msg) + return None # We cannot detect the schema in full. - return tuple(bq_schema_out) if bq_schema_out else None + return tuple(bq_schema_out) -def augment_schema(dataframe, current_bq_schema): - """Try to deduce the unknown field types and return an improved schema. +def _get_schema_by_pyarrow(name, series): + """Attempt to detect the type of the given series by leveraging PyArrow's + type detection capabilities. - This function requires ``pyarrow`` to run. If all the missing types still - cannot be detected, ``None`` is returned. If all types are already known, - a shallow copy of the given schema is returned. + This function requires the ``pyarrow`` library to be installed and + available. If the series type cannot be determined or ``pyarrow`` is not + available, ``None`` is returned. Args: - dataframe (pandas.DataFrame): - DataFrame for which some of the field types are still unknown. - current_bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): - A BigQuery schema for ``dataframe``. The types of some or all of - the fields may be ``None``. + name (str): + the column name of the SchemaField. + series (pandas.Series): + The Series data for which to detect the data type. Returns: - Optional[Sequence[google.cloud.bigquery.schema.SchemaField]] + Optional[google.cloud.bigquery.schema.SchemaField]: + A tuple containing the BigQuery-compatible type string (e.g., + "STRING", "INTEGER", "TIMESTAMP", "DATETIME", "NUMERIC", "BIGNUMERIC") + and the mode string ("NULLABLE", "REPEATED"). + Returns ``None`` if the type cannot be determined or ``pyarrow`` + is not imported. """ - # pytype: disable=attribute-error - augmented_schema = [] - unknown_type_fields = [] - for field in current_bq_schema: - if field.field_type is not None: - augmented_schema.append(field) - continue - - arrow_table = pyarrow.array(dataframe.reset_index()[field.name]) - - if pyarrow.types.is_list(arrow_table.type): - # `pyarrow.ListType` - detected_mode = "REPEATED" - detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq( - arrow_table.values.type.id - ) - - # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds - # it to such datetimes, causing them to be recognized as TIMESTAMP type. - # We thus additionally check the actual data to see if we need to overrule - # that and choose DATETIME instead. - # Note that this should only be needed for datetime values inside a list, - # since scalar datetime values have a proper Pandas dtype that allows - # distinguishing between timezone-naive and timezone-aware values before - # even requiring the additional schema augment logic in this method. - if detected_type == "TIMESTAMP": - valid_item = _first_array_valid(dataframe[field.name]) - if isinstance(valid_item, datetime) and valid_item.tzinfo is None: - detected_type = "DATETIME" - else: - detected_mode = field.mode - detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) - if detected_type == "NUMERIC" and arrow_table.type.scale > 9: - detected_type = "BIGNUMERIC" - if detected_type is None: - unknown_type_fields.append(field) - continue + if not pyarrow: + return None - new_field = schema.SchemaField( - name=field.name, - field_type=detected_type, - mode=detected_mode, - description=field.description, - fields=field.fields, - ) - augmented_schema.append(new_field) + arrow_table = pyarrow.array(series) + if pyarrow.types.is_list(arrow_table.type): + # `pyarrow.ListType` + mode = "REPEATED" + type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.values.type.id) + + # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds + # it to such datetimes, causing them to be recognized as TIMESTAMP type. + # We thus additionally check the actual data to see if we need to overrule + # that and choose DATETIME instead. + # Note that this should only be needed for datetime values inside a list, + # since scalar datetime values have a proper Pandas dtype that allows + # distinguishing between timezone-naive and timezone-aware values before + # even requiring the additional schema augment logic in this method. + if type == "TIMESTAMP": + valid_item = _first_array_valid(series) + if isinstance(valid_item, datetime) and valid_item.tzinfo is None: + type = "DATETIME" + else: + mode = "NULLABLE" # default mode + type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) + if type == "NUMERIC" and arrow_table.type.scale > 9: + type = "BIGNUMERIC" - if unknown_type_fields: - warnings.warn( - "Pyarrow could not determine the type of columns: {}.".format( - ", ".join(field.name for field in unknown_type_fields) - ) - ) + if type is not None: + return schema.SchemaField(name, type, mode) + else: return None - return augmented_schema - # pytype: enable=attribute-error - def dataframe_to_arrow(dataframe, bq_schema): """Convert pandas dataframe to Arrow table, using BigQuery schema. diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 749b0a00e..1f1aab7a4 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -284,15 +284,13 @@ def name(self): return self._properties.get("name", "") @property - def field_type(self): + def field_type(self) -> str: """str: The type of the field. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ type_ = self._properties.get("type") - if type_ is None: # Shouldn't happen, but some unit tests do this. - return None return cast(str, type_).upper() @property @@ -397,20 +395,16 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ - field_type = self.field_type.upper() if self.field_type is not None else None - - # Type can temporarily be set to None if the code needs a SchemaField instance, - # but has not determined the exact type of the field yet. - if field_type is not None: - if field_type == "STRING" or field_type == "BYTES": - if self.max_length is not None: - field_type = f"{field_type}({self.max_length})" - elif field_type.endswith("NUMERIC"): - if self.precision is not None: - if self.scale is not None: - field_type = f"{field_type}({self.precision}, {self.scale})" - else: - field_type = f"{field_type}({self.precision})" + field_type = self.field_type + if field_type == "STRING" or field_type == "BYTES": + if self.max_length is not None: + field_type = f"{field_type}({self.max_length})" + elif field_type.endswith("NUMERIC"): + if self.precision is not None: + if self.scale is not None: + field_type = f"{field_type}({self.precision}, {self.scale})" + else: + field_type = f"{field_type}({self.precision})" policy_tags = ( None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index d6ea5df7e..d87c65581 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1568,31 +1568,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): # set to "datetime64[ns]", and pyarrow converts that to pyarrow.TimestampArray. # We thus cannot expect to get a DATETIME date when converting back to the # BigQuery type. - - current_schema = ( - schema.SchemaField("bool_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("int_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("float_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("time_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("timestamp_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("date_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), - ) - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - - # there should be no relevant warnings - unwanted_warnings = [ - warning for warning in warned if "Pyarrow could not" in str(warning) - ] - assert not unwanted_warnings - - # the augmented schema must match the expected - expected_schema = ( + expected_schemas = ( schema.SchemaField("bool_field", field_type="BOOL", mode="NULLABLE"), schema.SchemaField("int_field", field_type="INT64", mode="NULLABLE"), schema.SchemaField("float_field", field_type="FLOAT64", mode="NULLABLE"), @@ -1607,8 +1583,13 @@ def test_augment_schema_type_detection_succeeds(module_under_test): ), ) - by_name = operator.attrgetter("name") - assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) + for col_name, expected_schema in zip(dataframe, expected_schemas): + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field == expected_schema @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1639,30 +1620,20 @@ def test_augment_schema_repeated_fields(module_under_test): ] ) - current_schema = ( - schema.SchemaField("string_array", field_type=None, mode="NULLABLE"), - schema.SchemaField("timestamp_array", field_type=None, mode="NULLABLE"), - schema.SchemaField("datetime_array", field_type=None, mode="NULLABLE"), - ) - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - - # there should be no relevant warnings - unwanted_warnings = [ - warning for warning in warned if "Pyarrow could not" in str(warning) - ] - assert not unwanted_warnings - # the augmented schema must match the expected - expected_schema = ( + expected_schemas = ( schema.SchemaField("string_array", field_type="STRING", mode="REPEATED"), schema.SchemaField("timestamp_array", field_type="TIMESTAMP", mode="REPEATED"), schema.SchemaField("datetime_array", field_type="DATETIME", mode="REPEATED"), ) - by_name = operator.attrgetter("name") - assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) + for col_name, expected_schema in zip(dataframe, expected_schemas): + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field == expected_schema @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1681,24 +1652,21 @@ def test_augment_schema_type_detection_fails(module_under_test): }, ] ) - current_schema = [ - schema.SchemaField("status", field_type="STRING", mode="NULLABLE"), - schema.SchemaField("struct_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("struct_field_2", field_type=None, mode="NULLABLE"), - ] - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - assert augmented_schema is None + expected_schemas = ( + schema.SchemaField("status", field_type="STRING", mode="NULLABLE"), + # Could not determine the type of these columns + None, + None, + ) - expected_warnings = [ - warning for warning in warned if "could not determine" in str(warning) - ] - assert len(expected_warnings) == 1 - warning_msg = str(expected_warnings[0]) - assert "pyarrow" in warning_msg.lower() - assert "struct_field" in warning_msg and "struct_field_2" in warning_msg + for col_name, expected_schema in zip(dataframe, expected_schemas): + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field == expected_schema @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1706,23 +1674,14 @@ def test_augment_schema_type_detection_fails_array_data(module_under_test): dataframe = pandas.DataFrame( data=[{"all_none_array": [None, float("NaN")], "empty_array": []}] ) - current_schema = [ - schema.SchemaField("all_none_array", field_type=None, mode="NULLABLE"), - schema.SchemaField("empty_array", field_type=None, mode="NULLABLE"), - ] - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - assert augmented_schema is None - - expected_warnings = [ - warning for warning in warned if "could not determine" in str(warning) - ] - assert len(expected_warnings) == 1 - warning_msg = str(expected_warnings[0]) - assert "pyarrow" in warning_msg.lower() - assert "all_none_array" in warning_msg and "empty_array" in warning_msg + for col_name in dataframe: + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field is None @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 3f2304a70..c63a8312c 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -640,11 +640,6 @@ def test___repr__(self): expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)" self.assertEqual(repr(field1), expected) - def test___repr__type_not_set(self): - field1 = self._make_one("field1", field_type=None) - expected = "SchemaField('field1', None, 'NULLABLE', None, None, (), None)" - self.assertEqual(repr(field1), expected) - def test___repr__evaluable_no_policy_tags(self): field = self._make_one("field1", "STRING", "REQUIRED", "Description") field_repr = repr(field) From cb646ceea172bf199f366ae0592546dff2d3bcb2 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 19 May 2025 12:10:12 -0700 Subject: [PATCH 449/536] feat: support job reservation (#2186) * feat: support job reservation * Update google/cloud/bigquery/job/base.py Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/job/base.py | 32 +++++++++++++++++++++++ tests/unit/job/test_base.py | 42 +++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index eaa9d3460..5eb700ce7 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -224,6 +224,26 @@ def job_timeout_ms(self, value): else: self._properties.pop("jobTimeoutMs", None) + @property + def reservation(self): + """str: Optional. The reservation that job would use. + + User can specify a reservation to execute the job. If reservation is + not set, reservation is determined based on the rules defined by the + reservation assignments. The expected format is + projects/{project}/locations/{location}/reservations/{reservation}. + + Raises: + ValueError: If ``value`` type is not None or of string type. + """ + return self._properties.setdefault("reservation", None) + + @reservation.setter + def reservation(self, value): + if value and not isinstance(value, str): + raise ValueError("Reservation must be None or a string.") + self._properties["reservation"] = value + @property def labels(self): """Dict[str, str]: Labels for the job. @@ -488,6 +508,18 @@ def location(self): """str: Location where the job runs.""" return _helpers._get_sub_prop(self._properties, ["jobReference", "location"]) + @property + def reservation_id(self): + """str: Name of the primary reservation assigned to this job. + + Note that this could be different than reservations reported in + the reservation field if parent reservations were used to execute + this job. + """ + return _helpers._get_sub_prop( + self._properties, ["statistics", "reservation_id"] + ) + def _require_client(self, client): """Check client or verify over-ride. diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 22a0fa450..aa3d49ce3 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -443,6 +443,16 @@ def test_state(self): status["state"] = state self.assertEqual(job.state, state) + def test_reservation_id(self): + reservation_id = "RESERVATION-ID" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.reservation_id) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.reservation_id) + stats["reservation_id"] = reservation_id + self.assertEqual(job.reservation_id, reservation_id) + def _set_properties_job(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -1188,15 +1198,18 @@ def test_fill_query_job_config_from_default(self): job_config = QueryJobConfig() job_config.dry_run = True job_config.maximum_bytes_billed = 1000 + job_config.reservation = "reservation_1" default_job_config = QueryJobConfig() default_job_config.use_query_cache = True default_job_config.maximum_bytes_billed = 2000 + default_job_config.reservation = "reservation_2" final_job_config = job_config._fill_from_default(default_job_config) self.assertTrue(final_job_config.dry_run) self.assertTrue(final_job_config.use_query_cache) self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + self.assertEqual(final_job_config.reservation, "reservation_1") def test_fill_load_job_from_default(self): from google.cloud.bigquery import LoadJobConfig @@ -1204,15 +1217,18 @@ def test_fill_load_job_from_default(self): job_config = LoadJobConfig() job_config.create_session = True job_config.encoding = "UTF-8" + job_config.reservation = "reservation_1" default_job_config = LoadJobConfig() default_job_config.ignore_unknown_values = True default_job_config.encoding = "ISO-8859-1" + default_job_config.reservation = "reservation_2" final_job_config = job_config._fill_from_default(default_job_config) self.assertTrue(final_job_config.create_session) self.assertTrue(final_job_config.ignore_unknown_values) self.assertEqual(final_job_config.encoding, "UTF-8") + self.assertEqual(final_job_config.reservation, "reservation_1") def test_fill_from_default_conflict(self): from google.cloud.bigquery import QueryJobConfig @@ -1232,10 +1248,12 @@ def test_fill_from_empty_default_conflict(self): job_config = QueryJobConfig() job_config.dry_run = True job_config.maximum_bytes_billed = 1000 + job_config.reservation = "reservation_1" final_job_config = job_config._fill_from_default(default_job_config=None) self.assertTrue(final_job_config.dry_run) self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + self.assertEqual(final_job_config.reservation, "reservation_1") @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") def test__get_sub_prop_wo_default(self, _get_sub_prop): @@ -1338,3 +1356,27 @@ def test_job_timeout_properties(self): job_config.job_timeout_ms = None assert job_config.job_timeout_ms is None assert "jobTimeoutMs" not in job_config._properties + + def test_reservation_miss(self): + job_config = self._make_one() + self.assertEqual(job_config.reservation, None) + + def test_reservation_hit(self): + job_config = self._make_one() + job_config._properties["reservation"] = "foo" + self.assertEqual(job_config.reservation, "foo") + + def test_reservation_update_in_place(self): + job_config = self._make_one() + job_config.reservation = "bar" # update in place + self.assertEqual(job_config.reservation, "bar") + + def test_reservation_setter_invalid(self): + job_config = self._make_one() + with self.assertRaises(ValueError): + job_config.reservation = object() + + def test_reservation_setter(self): + job_config = self._make_one() + job_config.reservation = "foo" + self.assertEqual(job_config._properties["reservation"], "foo") From bf58ca5425809b08895eebfa74a8ef5c559a69ac Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 19 May 2025 16:54:24 -0400 Subject: [PATCH 450/536] ci: Update to the CI/CD pipeline via github workflow to help cut turn-around time (#2189) Update to the CI/CD pipeline via github workflow to help cut turn-around time. * added github workflow * changed the number of pytest-xdist workers from "auto" to "8" (based on local tests and discussion with Tim, choosing auto sometimes takes longer to run than choosing a smaller number. I suspect this is partly because for small or short tests the overhead needed to setup a worker exceeds the time savings of having extra workers). * modified numerous tests to explicitly include a project path to avoid an attempt to find the project by making an external call via the pydata-google-auth workflow (which opens an input and waits for response from the user that never comes). --- .github/workflows/unittest.yml | 89 ++++++++++++++++++++++++++++++++++ noxfile.py | 2 +- tests/unit/test_magics.py | 30 +++++++++++- 3 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/unittest.yml diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml new file mode 100644 index 000000000..24c9ddbaf --- /dev/null +++ b/.github/workflows/unittest.yml @@ -0,0 +1,89 @@ +on: + pull_request: + branches: + - main +name: unittest +jobs: + unit: + # Use `ubuntu-latest` runner. + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.9', '3.11', '3.12', '3.13'] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit tests + env: + COVERAGE_FILE: .coverage-${{ matrix.python }} + run: | + nox -s unit-${{ matrix.python }} + - name: Upload coverage results + uses: actions/upload-artifact@v4 + with: + name: coverage-artifact-${{ matrix.python }} + path: .coverage-${{ matrix.python }} + include-hidden-files: true + + unit_noextras: + # Use `ubuntu-latest` runner. + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.9', '3.13'] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit_noextras tests + env: + COVERAGE_FILE: .coverage-unit-noextras-${{ matrix.python }} + run: | + nox -s unit_noextras-${{ matrix.python }} + - name: Upload coverage results + uses: actions/upload-artifact@v4 + with: + name: coverage-artifact-unit-noextras-${{ matrix.python }} + path: .coverage-unit-noextras-${{ matrix.python }} + include-hidden-files: true + + cover: + runs-on: ubuntu-latest + needs: + - unit + - unit_noextras + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.9" + - name: Install coverage + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install coverage + - name: Download coverage results + uses: actions/download-artifact@v4 + with: + path: .coverage-results/ + - name: Report coverage results + run: | + find .coverage-results -type f -name '*.zip' -exec unzip {} \; + coverage combine .coverage-results/**/.coverage* + coverage report --show-missing --fail-under=100 diff --git a/noxfile.py b/noxfile.py index 1922a68a5..575bbb100 100644 --- a/noxfile.py +++ b/noxfile.py @@ -128,7 +128,7 @@ def default(session, install_extras=True): # Run py.test against the unit tests. session.run( "py.test", - "-n=auto", + "-n=8", "--quiet", "-W default::PendingDeprecationWarning", "--cov=google/cloud/bigquery", diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 0f1e030cb..a9a12283b 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -480,6 +480,7 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -831,6 +832,7 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(monkeypatch): assert close_transports.called +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_w_table_id_invalid(monkeypatch): ip = IPython.get_ipython() monkeypatch.setattr(bigquery, "bigquery_magics", None) @@ -861,6 +863,7 @@ def test_bigquery_magic_w_table_id_invalid(monkeypatch): assert "Traceback (most recent call last)" not in output +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_w_missing_query(monkeypatch): ip = IPython.get_ipython() monkeypatch.setattr(bigquery, "bigquery_magics", None) @@ -1354,6 +1357,8 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1383,6 +1388,8 @@ def test_bigquery_magic_with_progress_bar_type(monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + with run_query_patch as run_query_mock: ip.run_cell_magic( "bigquery", "--progress_bar_type=tqdm_gui", "SELECT 17 as num" @@ -1565,6 +1572,8 @@ def test_bigquery_magic_with_string_params(ipython_ns_cleanup, monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1605,6 +1614,8 @@ def test_bigquery_magic_with_dict_params(ipython_ns_cleanup, monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1689,6 +1700,7 @@ def test_bigquery_magic_with_option_value_incorrect(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" sql = "SELECT @foo AS foo" @@ -1719,6 +1731,8 @@ def test_bigquery_magic_with_dict_params_negative_value( run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1760,6 +1774,8 @@ def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup, monkeyp run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1801,6 +1817,8 @@ def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup, monkeyp run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1852,6 +1870,7 @@ def test_bigquery_magic_valid_query_in_existing_variable( magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" ipython_ns_cleanup.append((ip, "custom_query")) ipython_ns_cleanup.append((ip, "query_results_df")) @@ -1892,6 +1911,7 @@ def test_bigquery_magic_nonexisting_query_variable(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True @@ -1917,7 +1937,7 @@ def test_bigquery_magic_empty_query_variable_name(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - + magics.context.project = "unit-test-project" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) @@ -1940,6 +1960,7 @@ def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup, monkeypatc magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True @@ -1968,9 +1989,14 @@ def test_bigquery_magic_query_variable_not_identifier(monkeypatch): google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" cell_body = "$123foo" # 123foo is not valid Python identifier - with io.capture_output() as captured_io: + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + + with run_query_patch, io.capture_output() as captured_io: ip.run_cell_magic("bigquery", "", cell_body) # If "$" prefixes a string that is not a Python identifier, we do not treat such From 06ee3df6c5346e5041ebab80da8b2a299378444c Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 19 May 2025 16:02:21 -0700 Subject: [PATCH 451/536] Revert "fix: table iterator should not use bqstorage when page_size is not None (#2154)" (#2191) This reverts commit e89a707b162182ededbf94cc9a0f7594bc2be475. --- google/cloud/bigquery/table.py | 11 ++--------- tests/unit/test_dbapi_cursor.py | 1 - tests/unit/test_table.py | 7 ------- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index e084468f6..3f472c490 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1897,11 +1897,6 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed - @property - def page_size(self) -> Optional[int]: - """The maximum number of rows in each page of results from this request, if present.""" - return self._page_size - def _is_almost_completely_cached(self): """Check if all results are completely cached. @@ -1953,7 +1948,7 @@ def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): if self._is_almost_completely_cached(): return False - if self.max_results is not None or self.page_size is not None: + if self.max_results is not None: return False try: @@ -2023,9 +2018,7 @@ def _maybe_warn_max_results( bqstorage_client: The BigQuery Storage client intended to use for downloading result rows. """ - if bqstorage_client is not None and ( - self.max_results is not None or self.page_size is not None - ): + if bqstorage_client is not None and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " "reverting to fetching data with the REST endpoint.", diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index cba9030de..6fca4cec0 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -161,7 +161,6 @@ def _mock_rows( mock_rows, ) mock_rows.max_results = None - mock_rows.page_size = None type(mock_rows).job_id = mock.PropertyMock(return_value="test-job-id") type(mock_rows).location = mock.PropertyMock(return_value="test-location") type(mock_rows).num_dml_affected_rows = mock.PropertyMock( diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 92fa0e2ec..4791c6511 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2858,13 +2858,6 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) - def test__should_use_bqstorage_returns_false_if_page_size_set(self): - iterator = self._make_one(page_size=10, first_page_response=None) # not cached - result = iterator._should_use_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - self.assertFalse(result) - def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached From b140fca726488106693aaf14695cb7bb9b4b2796 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 16:27:29 -0700 Subject: [PATCH 452/536] chore(main): release 3.33.0 (#2180) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- CHANGELOG.md | 15 +++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff1bd7acc..2f7166d44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.33.0](https://github.com/googleapis/python-bigquery/compare/v3.32.0...v3.33.0) (2025-05-19) + + +### Features + +* Add ability to set autodetect_schema query param in update_table ([#2171](https://github.com/googleapis/python-bigquery/issues/2171)) ([57f940d](https://github.com/googleapis/python-bigquery/commit/57f940d957613b4d80fb81ea40a1177b73856189)) +* Add dtype parameters to to_geodataframe functions ([#2176](https://github.com/googleapis/python-bigquery/issues/2176)) ([ebfd0a8](https://github.com/googleapis/python-bigquery/commit/ebfd0a83d43bcb96f65f5669437220aa6138b766)) +* Support job reservation ([#2186](https://github.com/googleapis/python-bigquery/issues/2186)) ([cb646ce](https://github.com/googleapis/python-bigquery/commit/cb646ceea172bf199f366ae0592546dff2d3bcb2)) + + +### Bug Fixes + +* Ensure AccessEntry equality and repr uses the correct `entity_type` ([#2182](https://github.com/googleapis/python-bigquery/issues/2182)) ([0217637](https://github.com/googleapis/python-bigquery/commit/02176377d5e2fc25b5cd4f46aa6ebfb1b6a960a6)) +* Ensure SchemaField.field_dtype returns a string ([#2188](https://github.com/googleapis/python-bigquery/issues/2188)) ([7ec2848](https://github.com/googleapis/python-bigquery/commit/7ec2848379d5743bbcb36700a1153540c451e0e0)) + ## [3.32.0](https://github.com/googleapis/python-bigquery/compare/v3.31.0...v3.32.0) (2025-05-12) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index fe13d2477..8304ac025 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.32.0" +__version__ = "3.33.0" From a3d6bf3a8d674984957997e965a811fa58dfc4a6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 20 May 2025 10:21:22 -0400 Subject: [PATCH 453/536] ci: Import numpy before pyarrow in tests to resolve import warning (#2187) * Fix: Import numpy before pyarrow in tests to resolve import warning A `PytestDeprecationWarning` was occurring in several test files because `pyarrow`, when imported by `pytest.importorskip`, would fail to import `numpy.core.multiarray`. This change addresses the warning by explicitly importing `numpy` before `pytest.importorskip("pyarrow", ...)` in the affected test files. This ensures that numpy is fully initialized before pyarrow attempts to use it, resolving the underlying import error. I also updated the test execution to use `nox -s unit`, which correctly sets up the test environment and dependencies, allowing the tests to pass and confirm the warning is resolved. Pre-existing failures in `tests/unit/test_magics.py` are unrelated to this change. * Update tests/unit/test__pyarrow_helpers.py * revisions to numpy handling * adds import or skip commands to accompany pyarrow import or skips * Update tests/unit/test__pandas_helpers.py * updates an import step and restores gc import * Updates magics.context and removes unneeded? reference to numpy/pyarrow --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- noxfile.py | 3 +-- testing/constraints-3.9.txt | 1 + tests/unit/test__pandas_helpers.py | 1 + tests/unit/test__pyarrow_helpers.py | 2 +- tests/unit/test_dbapi__helpers.py | 1 + tests/unit/test_magics.py | 5 +++++ tests/unit/test_table.py | 28 ++++++++++++++++++++++++++-- tests/unit/test_table_arrow.py | 3 ++- 8 files changed, 38 insertions(+), 6 deletions(-) diff --git a/noxfile.py b/noxfile.py index 575bbb100..6807b7ee4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -163,8 +163,7 @@ def unit_noextras(session): # so that it continues to be an optional dependency. # https://github.com/googleapis/python-bigquery/issues/1877 if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - session.install("pyarrow==4.0.0") - + session.install("pyarrow==4.0.0", "numpy==1.20.2") default(session, install_extras=False) diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 60a155f0d..f61c0cf09 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -20,6 +20,7 @@ ipykernel==6.2.0 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 +numpy==1.20.2 packaging==24.2.0 pandas==1.3.0 pandas-gbq==0.26.1 diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index d87c65581..bc94f5f54 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1856,6 +1856,7 @@ def test__download_table_bqstorage_shuts_down_workers( Make sure that when the top-level iterator goes out of scope (is deleted), the child threads are also stopped. """ + pytest.importorskip("google.cloud.bigquery_storage_v1") from google.cloud.bigquery import dataset from google.cloud.bigquery import table import google.cloud.bigquery_storage_v1.reader diff --git a/tests/unit/test__pyarrow_helpers.py b/tests/unit/test__pyarrow_helpers.py index 06fc2eb85..c12a526de 100644 --- a/tests/unit/test__pyarrow_helpers.py +++ b/tests/unit/test__pyarrow_helpers.py @@ -14,7 +14,7 @@ import pytest - +numpy = pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 7e1da0034..9907df97b 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -210,6 +210,7 @@ def test_empty_iterable(self): self.assertEqual(list(result), []) def test_non_empty_iterable(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") from tests.unit.helpers import _to_pyarrow diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index a9a12283b..814150693 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -1276,6 +1276,11 @@ def test_bigquery_magic_with_no_query_cache(monkeypatch): bigquery.load_ipython_extension(ip) conn = make_connection() monkeypatch.setattr(magics.context, "_connection", conn) + monkeypatch.setattr( + magics.context, + "credentials", + mock.create_autospec(google.auth.credentials.Credentials, instance=True), + ) monkeypatch.setattr(magics.context, "project", "project-from-context") # --no_query_cache option should override context. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 4791c6511..eb2c8d9ec 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2416,6 +2416,7 @@ def test_to_arrow_error_if_pyarrow_is_none(self): row_iterator.to_arrow() def test_to_arrow(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow") row_iterator = self._make_one() tbl = row_iterator.to_arrow() @@ -2423,6 +2424,7 @@ def test_to_arrow(self): self.assertEqual(tbl.num_rows, 0) def test_to_arrow_iterable(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3089,6 +3091,7 @@ def test_to_arrow_iterable_w_bqstorage(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_arrow(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3173,6 +3176,7 @@ def test_to_arrow(self): ) def test_to_arrow_w_nulls(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3209,6 +3213,7 @@ def test_to_arrow_w_nulls(self): self.assertEqual(ages, [32, 29, None, 111]) def test_to_arrow_w_unknown_type(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3254,6 +3259,7 @@ def test_to_arrow_w_unknown_type(self): self.assertTrue(all("sport" in str(warning) for warning in warned)) def test_to_arrow_w_empty_table(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3295,6 +3301,7 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[1].name, "age") def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.schema import SchemaField @@ -3337,6 +3344,7 @@ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.schema import SchemaField @@ -3375,6 +3383,7 @@ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() def test_to_arrow_w_bqstorage(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -3458,6 +3467,7 @@ def test_to_arrow_w_bqstorage(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_arrow_w_bqstorage_creates_client(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -3491,6 +3501,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3524,6 +3535,7 @@ def mock_verify_version(raise_if_error: bool = False): self.assertEqual(tbl.num_rows, 2) def test_to_arrow_w_bqstorage_no_streams(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -3563,6 +3575,7 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[2].name, "colB") def test_to_arrow_progress_bar(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("tqdm") pytest.importorskip("tqdm.notebook") @@ -3696,6 +3709,7 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["age"][0], 33) def test_to_dataframe_iterable_w_bqstorage(self): + pytest.importorskip("numpy") pandas = pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") @@ -3770,6 +3784,7 @@ def test_to_dataframe_iterable_w_bqstorage(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): + pytest.importorskip("numpy") pandas = pytest.importorskip("pandas") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -4513,7 +4528,7 @@ def test_to_dataframe_w_none_dtypes_mapper(self): def test_to_dataframe_w_unsupported_dtypes_mapper(self): pytest.importorskip("pandas") - import numpy + numpy = pytest.importorskip("numpy") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4797,6 +4812,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() def test_to_dataframe_w_bqstorage_creates_client(self): + pytest.importorskip("numpy") pytest.importorskip("pandas") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -4830,6 +4846,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() def test_to_dataframe_w_bqstorage_no_streams(self): + pytest.importorskip("numpy") pytest.importorskip("pandas") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -4858,6 +4875,7 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertTrue(got.empty) def test_to_dataframe_w_bqstorage_logs_session(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pytest.importorskip("pyarrow") @@ -4882,6 +4900,7 @@ def test_to_dataframe_w_bqstorage_logs_session(self): ) def test_to_dataframe_w_bqstorage_empty_streams(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -4936,6 +4955,7 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertTrue(got.empty) def test_to_dataframe_w_bqstorage_nonempty(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5018,6 +5038,7 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): + pytest.importorskip("numpy") bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5070,6 +5091,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertTrue(got.index.is_unique) def test_to_dataframe_w_bqstorage_updates_progress_bar(self): + pytest.importorskip("numpy") bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5147,6 +5169,7 @@ def blocking_to_arrow(*args, **kwargs): tqdm_mock().close.assert_called_once() def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): + pytest.importorskip("numpy") bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5322,6 +5345,7 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pandas = pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5604,7 +5628,7 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): """ pandas = pytest.importorskip("pandas") geopandas = pytest.importorskip("geopandas") - import numpy + numpy = pytest.importorskip("numpy") from shapely import wkt row_iterator = self._make_one_from_data( diff --git a/tests/unit/test_table_arrow.py b/tests/unit/test_table_arrow.py index 830c4ceb7..fdd1b7b78 100644 --- a/tests/unit/test_table_arrow.py +++ b/tests/unit/test_table_arrow.py @@ -18,7 +18,8 @@ import google.cloud.bigquery.table -pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") +pytest.importorskip("numpy") +pytest.importorskip("pyarrow", minversion="3.0.0") def test_to_arrow_with_jobs_query_response(): From 9b5ee78f046d9ca3f758eeca6244b8485fe35875 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Tue, 20 May 2025 10:46:23 -0400 Subject: [PATCH 454/536] docs: update query.py (#2192) Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/job/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index f9b99b7fb..954a46963 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1529,7 +1529,7 @@ def result( # type: ignore # (incompatible with supertype) a DDL query, an ``_EmptyRowIterator`` instance is returned. Raises: - google.cloud.exceptions.GoogleAPICallError: + google.api_core.exceptions.GoogleAPICallError: If the job failed and retries aren't successful. concurrent.futures.TimeoutError: If the job did not complete in the given timeout. From bfa95f1469481c682e990743cc8b7025fb0facd1 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 20 May 2025 13:56:30 -0400 Subject: [PATCH 455/536] ci: adds new github workflow focused on documentation in prep to deprecate kokoro presubmit (#2194) * I've created a new workflow for the docs and docsfx nox sessions. This involves a new GitHub workflow located in `.github/workflows/docs.yml`. This new workflow will now handle running the `docs` and `docsfx` nox sessions, which were previously managed by the `.kokoro/presubmit/presubmit.cfg` workflow. Here's how the new workflow operates: - It activates when you make pull requests to the `main` branch. - It executes two jobs: `docs` and `docsfx`. - Both of these jobs utilize Python 3.10. - Each job installs nox and then runs its corresponding nox session (`docs-3.10` or `docsfx-3.10`). This adjustment is a step towards phasing out and removing the `.kokoro/presubmit/presubmit.cfg` file. * Update .github/workflows/docs.yml * Update .github/workflows/docs.yml --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .github/workflows/docs.yml | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..9372faac2 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,39 @@ +on: + pull_request: + branches: + - main +name: docs +jobs: + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run docs session + run: | + nox -s docs-3.10 + + docfx: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run docfx session + run: | + nox -s docfx-3.10 From 12490f2f03681516465fc34217dcdf57000f6fdd Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 21 May 2025 16:20:17 +0200 Subject: [PATCH 456/536] fix(deps): update all dependencies (#2184) * fix(deps): update all dependencies * Update pyproject.toml * Update .github/workflows/docs.yml * Update .github/workflows/docs.yml --------- Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 6abea3b4d..cc71ee426 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 743d0fe35..4a5b75346 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 434a594cb..3b1a3ef54 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -3,7 +3,7 @@ certifi==2025.4.26 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' -click==8.2.0; python_version >= '3.10' +click==8.2.1; python_version >= '3.10' click-plugins==1.1.1 cligj==0.7.2 db-dtypes==1.4.3 @@ -12,7 +12,7 @@ geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 google-auth==2.40.1 -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 @@ -36,7 +36,7 @@ PyYAML==6.0.2 requests==2.32.3 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' -Shapely==2.1.0; python_version >= '3.10' +Shapely==2.1.1; python_version >= '3.10' six==1.17.0 typing-extensions==4.13.2 typing-inspect==0.9.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 6abea3b4d..cc71ee426 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index bb60f2a67..7d0c91e3d 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.9.0 +bigquery_magics==0.10.0 db-dtypes==1.4.3 -google.cloud.bigquery==3.32.0 +google.cloud.bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 6abea3b4d..cc71ee426 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 17f43bf78..9f131e5b8 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ -bigquery-magics==0.9.0 +bigquery-magics==0.10.0 db-dtypes==1.4.3 -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' ipython==9.2.0; python_version >= '3.10' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 6760e1228..503324cb0 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c31815d69..dae43eff3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 From 4379b3df0f4c5e0ac1d8308500306b4ec5c99dee Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 22 May 2025 13:01:13 -0400 Subject: [PATCH 457/536] ddocs: fixes several typos and updates a try except block (#2197) --- google/cloud/bigquery/_helpers.py | 2 +- google/cloud/bigquery/_pandas_helpers.py | 2 +- google/cloud/bigquery/client.py | 2 +- google/cloud/bigquery/table.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 76c4f1fbd..c7d7705e0 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -388,7 +388,7 @@ def range_to_py(self, value, field): class DataFrameCellDataParser(CellDataParser): - """Override of CellDataParser to handle differences in expection of values in DataFrame-like outputs. + """Override of CellDataParser to handle differences in expression of values in DataFrame-like outputs. This is used to turn the output of the REST API into a pyarrow Table, emulating the serialized arrow from the BigQuery Storage Read API. diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 6691e7ef6..10a5c59bb 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -1144,7 +1144,7 @@ def determine_requested_streams( """ if preserve_order: - # If preserve order is set, it takes precendence. + # If preserve order is set, it takes precedence. # Limit the requested streams to 1, to ensure that order # is preserved) return 1 diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8ad1586f4..067b389a5 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -4134,7 +4134,7 @@ def _list_rows_from_query_results( rows that were affected. query (Optional[str]): The query text used. - total_bytes_processed (Optinal[int]): + total_bytes_processed (Optional[int]): total bytes processed from job statistics, if present. Returns: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 3f472c490..3b1334bd3 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -44,7 +44,7 @@ import geopandas # type: ignore except ImportError: geopandas = None -else: +finally: _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" try: @@ -1786,7 +1786,7 @@ class RowIterator(HTTPIterator): the first page is requested. query (Optional[str]): The query text used. - total_bytes_processed (Optinal[int]): + total_bytes_processed (Optional[int]): total bytes processed from job statistics, if present. """ From cc6310819290c8d3362f96c73d2373d3d4f1b44d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 22 May 2025 16:20:44 -0400 Subject: [PATCH 458/536] ci: Remove unit tests and doc tests from kokoro presubmit. (#2195) * The message is a commit message, not a message to a user from an AI agent. Therefore, it should be output as is. Output: Remove Kokoro presubmit for unit, docs, and coverage. This commit removes the Kokoro presubmit configuration that runs `unit_noextras`, `unit`, `cover`, `docs`, and `docfx` nox sessions. These checks are already performed by GitHub Actions, making the Kokoro configuration redundant. The change involves removing the `NOX_SESSION` environment variable definition from `.kokoro/presubmit/presubmit.cfg`. * Update presubmit.cfg * Delete .kokoro/presubmit/presubmit.cfg --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .kokoro/presubmit/presubmit.cfg | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .kokoro/presubmit/presubmit.cfg diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg deleted file mode 100644 index ac4cc5847..000000000 --- a/.kokoro/presubmit/presubmit.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Disable system tests. -env_vars: { - key: "NOX_SESSION" - value: "unit_noextras unit cover docs docfx" -} From 85ff5b17e590b3c8c9b5bee64d5a69e0c01306ae Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 23 May 2025 10:25:33 -0400 Subject: [PATCH 459/536] ci: Configure Renovate to keep Python at 3.10 for docs workflow (#2199) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Configure Renovate to keep Python at 3.10 for docs workflow This change adds a packageRule to `renovate.json` to prevent Renovate from updating the Python version used in the `.github/workflows/docs.yml` GitHub Actions workflow. The rule specifically targets the `python-version` input of the `actions/setup-python` step and restricts allowed versions to `<3.11`, effectively pinning it to `3.10` for now. * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update renovate.json * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * adds files to excludes lists * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update owlbot.py * adds packageRule about pyproject.toml --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Owl Bot --- owlbot.py | 3 ++- renovate.json | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/owlbot.py b/owlbot.py index 60759adbe..80cf9d6e3 100644 --- a/owlbot.py +++ b/owlbot.py @@ -65,6 +65,7 @@ templated_files, excludes=[ "noxfile.py", + "renovate.json", "docs/multiprocessing.rst", "docs/index.rst", ".coveragerc", @@ -76,7 +77,7 @@ ".kokoro/continuous/prerelease-deps.cfg", ".kokoro/samples/python3.7/**", ".kokoro/samples/python3.8/**", - ".github/workflows", # exclude gh actions as credentials are needed for tests + ".github/workflows/**", # exclude gh actions as credentials are needed for tests "README.rst", ], ) diff --git a/renovate.json b/renovate.json index c7875c469..51eb51d6e 100644 --- a/renovate.json +++ b/renovate.json @@ -8,5 +8,12 @@ "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] - } + }, + "packageRules": [ + { + "matchFileNames": ["pyproject.toml"], + "matchStrings": ["matplotlib (.*); python_version == '3.9'"], + "allowedVersions": ">= 3.7.1, <= 3.9.2" + } + ] } From a5f98550121e033e887d2ae442b51ede13192a82 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 23 May 2025 10:57:55 -0400 Subject: [PATCH 460/536] ci: updates renovate.json to ignore docs.yml (#2200) * updates renovate to ignore docs.yml * Update renovate.json --- renovate.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/renovate.json b/renovate.json index 51eb51d6e..3ea143d4c 100644 --- a/renovate.json +++ b/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml", ".github/workflows/docs.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] }, From f67852d4f36c12abaca49dca5513382b36622aa0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 23 May 2025 17:32:50 +0200 Subject: [PATCH 461/536] chore(deps): update dependency google-auth to v2.40.2 (#2196) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3b1a3ef54..7a0946fae 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 -google-auth==2.40.1 +google-auth==2.40.2 google-cloud-bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 From 28a9994792ec90a6a4d16835faf2137c09c0fb02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 27 May 2025 04:38:22 -0500 Subject: [PATCH 462/536] docs: use query_and_wait in the array parameters sample (#2202) --- samples/client_query_w_array_params.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/client_query_w_array_params.py b/samples/client_query_w_array_params.py index 25592a94a..e9d759f61 100644 --- a/samples/client_query_w_array_params.py +++ b/samples/client_query_w_array_params.py @@ -35,8 +35,8 @@ def client_query_w_array_params() -> None: bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]), ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + rows = client.query_and_wait(query, job_config=job_config) # Make an API request. - for row in query_job: + for row in rows: print("{}: \t{}".format(row.name, row.count)) # [END bigquery_query_params_arrays] From 64cd39fb395c4a03ef6d2ec8261e1709477b2186 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 27 May 2025 10:53:49 -0700 Subject: [PATCH 463/536] feat: job creation mode GA (#2190) * feat: job creation mode GA This PR makes the underlying functionality related to how queries can optionally avoid job creation a GA feature. It does the following: * no longer uses the preview QUERY_PREVIEW_ENABLED environment variable to control job creation * adds a new argument to Client instantiation to control job creation mode * adds a property/setter to Client to control job creation mode This PR also updates/renames the sample demonstrating how to leverage job creation mode with Client.query_and_wait. --- google/cloud/bigquery/_job_helpers.py | 12 ++-------- google/cloud/bigquery/client.py | 22 +++++++++++------ google/cloud/bigquery/enums.py | 19 +++++++++++++++ ...rtmode.py => client_query_job_optional.py} | 24 +++++++++++-------- ...e.py => test_client_query_job_optional.py} | 6 ++--- tests/unit/test__job_helpers.py | 12 +++++----- tests/unit/test_client.py | 11 +++++++++ 7 files changed, 70 insertions(+), 36 deletions(-) rename samples/{client_query_shortmode.py => client_query_job_optional.py} (69%) rename samples/tests/{test_client_query_shortmode.py => test_client_query_job_optional.py} (85%) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 4a884ada5..888dc1e73 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -37,7 +37,6 @@ import copy import functools -import os import uuid import textwrap from typing import Any, Dict, Optional, TYPE_CHECKING, Union @@ -400,12 +399,6 @@ def query_and_wait( ) -> table.RowIterator: """Run the query, wait for it to finish, and return the results. - While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the - ``jobs.query`` REST API, use the default ``jobCreationMode`` unless - the environment variable ``QUERY_PREVIEW_ENABLED=true``. After - ``jobCreationMode`` is GA, this method will always use - ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query Args: client: @@ -500,9 +493,8 @@ def query_and_wait( request_body["maxResults"] = min(page_size, max_results) elif page_size is not None or max_results is not None: request_body["maxResults"] = page_size or max_results - - if os.getenv("QUERY_PREVIEW_ENABLED", "").casefold() == "true": - request_body["jobCreationMode"] = "JOB_CREATION_OPTIONAL" + if client.default_job_creation_mode: + request_body["jobCreationMode"] = client.default_job_creation_mode def do_query(): request_body["requestId"] = make_job_id() diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 067b389a5..c6873545b 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -221,6 +221,10 @@ class Client(ClientWithProject): client_options (Optional[Union[google.api_core.client_options.ClientOptions, Dict]]): Client options used to set user options on the client. API Endpoint should be set through client_options. + default_job_creation_mode (Optional[str]): + Sets the default job creation mode used by query methods such as + query_and_wait(). For lightweight queries, JOB_CREATION_OPTIONAL is + generally recommended. Raises: google.auth.exceptions.DefaultCredentialsError: @@ -243,6 +247,7 @@ def __init__( client_options: Optional[ Union[google.api_core.client_options.ClientOptions, Dict[str, Any]] ] = None, + default_job_creation_mode: Optional[str] = None, ) -> None: if client_options is None: client_options = {} @@ -277,6 +282,7 @@ def __init__( self._connection = Connection(self, **kw_args) self._location = location self._default_load_job_config = copy.deepcopy(default_load_job_config) + self.default_job_creation_mode = default_job_creation_mode # Use property setter so validation can run. self.default_query_job_config = default_query_job_config @@ -286,6 +292,15 @@ def location(self): """Default location for jobs / datasets / tables.""" return self._location + @property + def default_job_creation_mode(self): + """Default job creation mode used for query execution.""" + return self._default_job_creation_mode + + @default_job_creation_mode.setter + def default_job_creation_mode(self, value: Optional[str]): + self._default_job_creation_mode = value + @property def default_query_job_config(self) -> Optional[QueryJobConfig]: """Default ``QueryJobConfig`` or ``None``. @@ -3532,13 +3547,6 @@ def query_and_wait( ) -> RowIterator: """Run the query, wait for it to finish, and return the results. - While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the - ``jobs.query`` REST API, use the default ``jobCreationMode`` unless - the environment variable ``QUERY_PREVIEW_ENABLED=true``. After - ``jobCreationMode`` is GA, this method will always use - ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query - Args: query (str): SQL query to be executed. Defaults to the standard SQL diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 203ea3c7b..4cb7a056d 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -407,3 +407,22 @@ class BigLakeTableFormat(object): ICEBERG = "ICEBERG" """Apache Iceberg format.""" + + +class JobCreationMode(object): + """Documented values for Job Creation Mode.""" + + JOB_CREATION_MODE_UNSPECIFIED = "JOB_CREATION_MODE_UNSPECIFIED" + """Job creation mode is unspecified.""" + + JOB_CREATION_REQUIRED = "JOB_CREATION_REQUIRED" + """Job creation is always required.""" + + JOB_CREATION_OPTIONAL = "JOB_CREATION_OPTIONAL" + """Job creation is optional. + + Returning immediate results is prioritized. + BigQuery will automatically determine if a Job needs to be created. + The conditions under which BigQuery can decide to not create a Job are + subject to change. + """ diff --git a/samples/client_query_shortmode.py b/samples/client_query_job_optional.py similarity index 69% rename from samples/client_query_shortmode.py rename to samples/client_query_job_optional.py index 50446dc48..6321aea35 100644 --- a/samples/client_query_shortmode.py +++ b/samples/client_query_job_optional.py @@ -13,16 +13,18 @@ # limitations under the License. -def client_query_shortmode() -> None: - # [START bigquery_query_shortquery] - # This example demonstrates issuing a query that may be run in short query mode. - # - # To enable the short query mode preview feature, the QUERY_PREVIEW_ENABLED - # environmental variable should be set to `TRUE`. +def client_query_job_optional() -> None: + # [START bigquery_query_job_optional] + # This example demonstrates executing a query without requiring an associated + # job. from google.cloud import bigquery + from google.cloud.bigquery.enums import JobCreationMode - # Construct a BigQuery client object. - client = bigquery.Client() + # Construct a BigQuery client object, specifying that the library should + # avoid creating jobs when possible. + client = bigquery.Client( + default_job_creation_mode=JobCreationMode.JOB_CREATION_OPTIONAL + ) query = """ SELECT @@ -44,10 +46,12 @@ def client_query_shortmode() -> None: if rows.job_id is not None: print("Query was run with job state. Job ID: {}".format(rows.job_id)) else: - print("Query was run in short mode. Query ID: {}".format(rows.query_id)) + print( + "Query was run without creating a job. Query ID: {}".format(rows.query_id) + ) print("The query data:") for row in rows: # Row values can be accessed by field name or index. print("name={}, gender={}, total={}".format(row[0], row[1], row["total"])) - # [END bigquery_query_shortquery] + # [END bigquery_query_job_optional] diff --git a/samples/tests/test_client_query_shortmode.py b/samples/tests/test_client_query_job_optional.py similarity index 85% rename from samples/tests/test_client_query_shortmode.py rename to samples/tests/test_client_query_job_optional.py index 41132f24c..0e0b2cf19 100644 --- a/samples/tests/test_client_query_shortmode.py +++ b/samples/tests/test_client_query_job_optional.py @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,13 +14,13 @@ import typing -from .. import client_query_shortmode +from .. import client_query_job_optional if typing.TYPE_CHECKING: import pytest def test_client_query_shortmode(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_shortmode.client_query_shortmode() + client_query_job_optional.client_query_job_optional() out, err = capsys.readouterr() assert "Query was run" in out diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 4fa093c69..417f911b8 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -554,13 +554,9 @@ def test_query_and_wait_retries_job_times_out(): ) -def test_query_and_wait_sets_job_creation_mode(monkeypatch: pytest.MonkeyPatch): - monkeypatch.setenv( - "QUERY_PREVIEW_ENABLED", - # The comparison should be case insensitive. - "TrUe", - ) +def test_query_and_wait_sets_job_creation_mode(): client = mock.create_autospec(Client) + client.default_job_creation_mode = "JOB_CREATION_OPTIONAL" client._call_api.return_value = { "jobReference": { "projectId": "response-project", @@ -642,6 +638,7 @@ def test_query_and_wait_sets_location(): "useInt64Timestamp": True, }, "requestId": mock.ANY, + "jobCreationMode": mock.ANY, }, timeout=None, ) @@ -658,6 +655,7 @@ def test_query_and_wait_sets_location(): ) def test_query_and_wait_sets_max_results(max_results, page_size, expected): client = mock.create_autospec(Client) + client.default_job_creation_mode = None client._call_api.return_value = { "jobReference": { "projectId": "response-project", @@ -703,6 +701,7 @@ def test_query_and_wait_sets_max_results(max_results, page_size, expected): def test_query_and_wait_caches_completed_query_results_one_page(): client = mock.create_autospec(Client) + client.default_job_creation_mode = None client._call_api.return_value = { "jobReference": { "projectId": "response-project", @@ -768,6 +767,7 @@ def test_query_and_wait_caches_completed_query_results_one_page(): def test_query_and_wait_caches_completed_query_results_one_page_no_rows(): client = mock.create_autospec(Client) + client.default_job_creation_mode = None client._call_api.return_value = { "jobReference": { "projectId": "response-project", diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 468068321..8ce8d2cbd 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -213,6 +213,17 @@ def test_ctor_w_client_options_universe(self): ) self.assertEqual(client._connection.API_BASE_URL, "https://bigquery.foo.com") + def test_ctor_w_job_creation_mode(self): + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_job_creation_mode="foo", + ) + self.assertEqual(client.default_job_creation_mode, "foo") + def test_ctor_w_location(self): from google.cloud.bigquery._http import Connection From cfbf263947e4acb3a866dae96e622c67fc0c6ec3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 27 May 2025 20:46:02 +0200 Subject: [PATCH 464/536] chore(deps): update dependency pytest-xdist to v3.7.0 (#2203) Co-authored-by: Chalmer Lowe Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index cc71ee426..2ad35b418 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 7b01ce8ac..3ca365401 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index cc71ee426..2ad35b418 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index cc71ee426..2ad35b418 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 503324cb0..767f71fb1 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -2,4 +2,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 From d92b487c29e8d27a2f04e3b15eec14e8c3d109f0 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 27 May 2025 13:34:28 -0700 Subject: [PATCH 465/536] chore(main): release 3.34.0 (#2193) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: shollyman --- CHANGELOG.md | 18 ++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f7166d44..3b29a6a41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.34.0](https://github.com/googleapis/python-bigquery/compare/v3.33.0...v3.34.0) (2025-05-27) + + +### Features + +* Job creation mode GA ([#2190](https://github.com/googleapis/python-bigquery/issues/2190)) ([64cd39f](https://github.com/googleapis/python-bigquery/commit/64cd39fb395c4a03ef6d2ec8261e1709477b2186)) + + +### Bug Fixes + +* **deps:** Update all dependencies ([#2184](https://github.com/googleapis/python-bigquery/issues/2184)) ([12490f2](https://github.com/googleapis/python-bigquery/commit/12490f2f03681516465fc34217dcdf57000f6fdd)) + + +### Documentation + +* Update query.py ([#2192](https://github.com/googleapis/python-bigquery/issues/2192)) ([9b5ee78](https://github.com/googleapis/python-bigquery/commit/9b5ee78f046d9ca3f758eeca6244b8485fe35875)) +* Use query_and_wait in the array parameters sample ([#2202](https://github.com/googleapis/python-bigquery/issues/2202)) ([28a9994](https://github.com/googleapis/python-bigquery/commit/28a9994792ec90a6a4d16835faf2137c09c0fb02)) + ## [3.33.0](https://github.com/googleapis/python-bigquery/compare/v3.32.0...v3.33.0) (2025-05-19) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 8304ac025..9e1393854 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.33.0" +__version__ = "3.34.0" From de33204bd67bc897c3a19b709becd0b9473bd907 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 30 May 2025 11:16:38 +0200 Subject: [PATCH 466/536] chore(deps): update all dependencies (#2205) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 4a5b75346..a512dbd3a 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.33.0 +google-cloud-bigquery==3.34.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7a0946fae..049e88237 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,8 +12,8 @@ geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 google-auth==2.40.2 -google-cloud-bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google-cloud-bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 7d0c91e3d..960eb6db4 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.0 db-dtypes==1.4.3 -google.cloud.bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google.cloud.bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 9f131e5b8..27eb7459a 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ bigquery-magics==0.10.0 db-dtypes==1.4.3 -google-cloud-bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google-cloud-bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.2.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index dae43eff3..fd8bd672b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.33.0 +google-cloud-bigquery==3.34.0 From b86329188ba35e61871db82ae1d95d2a576eed1b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 30 May 2025 12:36:07 -0400 Subject: [PATCH 467/536] Fix: Update type hints for various BigQuery files (#2206) * Fix: Update type hints for various BigQuery files This commit addresses Issue #2132 by updating type hints in the following files: - google/cloud/bigquery/external_config.py - google/cloud/bigquery/job/base.py - google/cloud/bigquery/routine/routine.py - google/cloud/bigquery/schema.py - google/cloud/bigquery/table.py These changes improve code clarity and maintainability by providing more accurate type information. * updates type hints across the board --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/external_config.py | 9 +++++---- google/cloud/bigquery/job/base.py | 4 +--- google/cloud/bigquery/routine/routine.py | 12 +++--------- google/cloud/bigquery/schema.py | 6 ++---- google/cloud/bigquery/table.py | 11 ++++++----- 5 files changed, 17 insertions(+), 25 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 6e943adf3..cb8141cd0 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -22,6 +22,7 @@ import base64 import copy +import typing from typing import Any, Dict, FrozenSet, Iterable, Optional, Union from google.cloud.bigquery._helpers import _to_bytes @@ -835,10 +836,10 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema """ - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - prop = self._properties.get("schema", {}) # type: ignore - return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] # type: ignore + prop: Dict[str, Any] = typing.cast( + Dict[str, Any], self._properties.get("schema", {}) + ) + return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] @schema.setter def schema(self, value): diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 5eb700ce7..f007b9341 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -435,9 +435,7 @@ def __init__(self, job_id, client): @property def configuration(self) -> _JobConfig: """Job-type specific configurtion.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - configuration = self._CONFIG_CLASS() # pytype: disable=not-callable + configuration: _JobConfig = self._CONFIG_CLASS() # pytype: disable=not-callable configuration._properties = self._properties.setdefault("configuration", {}) return configuration diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 7e079781d..e933fa137 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -518,23 +518,17 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["projectId"] # pytype: disable=typed-dict-error + return self._properties.get("projectId", "") @property def dataset_id(self): """str: ID of dataset containing the routine.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["datasetId"] # pytype: disable=typed-dict-error + return self._properties.get("datasetId", "") @property def routine_id(self): """str: The routine ID.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["routineId"] # pytype: disable=typed-dict-error + return self._properties.get("routineId", "") @property def path(self): diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 1f1aab7a4..456730b00 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -232,11 +232,9 @@ def __init__( if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length if policy_tags is not _DEFAULT_VALUE: - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 self._properties["policyTags"] = ( - policy_tags.to_api_repr() # pytype: disable=attribute-error - if policy_tags is not None + policy_tags.to_api_repr() + if isinstance(policy_tags, PolicyTagList) else None ) if isinstance(range_element_type, str): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 3b1334bd3..3ffd5ca56 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -137,9 +137,9 @@ def _reference_getter(table): return TableReference(dataset_ref, table.table_id) -# TODO: The typehinting for this needs work. Setting this pragma to temporarily -# manage a pytype issue that came up in another PR. See Issue: #2132 -def _view_use_legacy_sql_getter(table): +def _view_use_legacy_sql_getter( + table: Union["Table", "TableListItem"] +) -> Optional[bool]: """bool: Specifies whether to execute the view with Legacy or Standard SQL. This boolean specifies whether to execute the view with Legacy SQL @@ -151,15 +151,16 @@ def _view_use_legacy_sql_getter(table): ValueError: For invalid value types. """ - view = table._properties.get("view") # type: ignore + view: Optional[Dict[str, Any]] = table._properties.get("view") if view is not None: # The server-side default for useLegacySql is True. - return view.get("useLegacySql", True) # type: ignore + return view.get("useLegacySql", True) if view is not None else True # In some cases, such as in a table list no view object is present, but the # resource still represents a view. Use the type as a fallback. if table.table_type == "VIEW": # The server-side default for useLegacySql is True. return True + return None # explicit return statement to appease mypy class _TableBase: From eb9c2aff242c5107f968bbd8b6a9d30cecc877f6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 30 May 2025 17:58:02 -0400 Subject: [PATCH 468/536] feat: Add UpdateMode to update_dataset (#2204) * feat: Add UpdateMode to update_dataset This commit introduces the `UpdateMode` enum and integrates it into the `update_dataset` method in the BigQuery client. The `UpdateMode` enum allows you to specify which parts of a dataset should be updated (metadata, ACL, or full update). The following changes were made: - Defined the `UpdateMode` enum in `google/cloud/bigquery/enums.py` with values: `UPDATE_MODE_UNSPECIFIED`, `UPDATE_METADATA`, `UPDATE_ACL`, and `UPDATE_FULL`. - Modified the `update_dataset` method in `google/cloud/bigquery/client.py` to accept an optional `update_mode` parameter. This parameter is added to the query parameters if provided. - Added unit tests in `tests/unit/test_client.py` to verify the correct handling of the `update_mode` parameter, including testing all enum values and the default case where it's not provided. * updates enums, client, and tests --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/client.py | 22 ++++++++ google/cloud/bigquery/enums.py | 18 +++++++ tests/unit/test_client.py | 93 ++++++++++++++++++++++++++++++++- 3 files changed, 132 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index c6873545b..cc3b3eb2a 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -91,6 +91,7 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.enums import UpdateMode from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, @@ -1198,6 +1199,7 @@ def update_dataset( fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + update_mode: Optional[UpdateMode] = None, ) -> Dataset: """Change some fields of a dataset. @@ -1237,6 +1239,20 @@ def update_dataset( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + update_mode (Optional[google.cloud.bigquery.enums.UpdateMode]): + Specifies the kind of information to update in a dataset. + By default, dataset metadata (e.g. friendlyName, description, + labels, etc) and ACL information are updated. This argument can + take on the following possible enum values. + + * :attr:`~google.cloud.bigquery.enums.UPDATE_MODE_UNSPECIFIED`: + The default value. Behavior defaults to UPDATE_FULL. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_METADATA`: + Includes metadata information for the dataset, such as friendlyName, description, labels, etc. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_ACL`: + Includes ACL information for the dataset, which defines dataset access for one or more entities. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_FULL`: + Includes both dataset metadata and ACL information. Returns: google.cloud.bigquery.dataset.Dataset: @@ -1250,6 +1266,11 @@ def update_dataset( path = dataset.path span_attributes = {"path": path, "fields": fields} + if update_mode: + query_params = {"updateMode": update_mode.value} + else: + query_params = {} + api_response = self._call_api( retry, span_name="BigQuery.updateDataset", @@ -1259,6 +1280,7 @@ def update_dataset( data=partial, headers=headers, timeout=timeout, + query_params=query_params, ) return Dataset.from_api_repr(api_response) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 4cb7a056d..e9cd911d0 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -409,6 +409,24 @@ class BigLakeTableFormat(object): """Apache Iceberg format.""" +class UpdateMode(enum.Enum): + """Specifies the kind of information to update in a dataset.""" + + UPDATE_MODE_UNSPECIFIED = "UPDATE_MODE_UNSPECIFIED" + """The default value. Behavior defaults to UPDATE_FULL.""" + + UPDATE_METADATA = "UPDATE_METADATA" + """Includes metadata information for the dataset, such as friendlyName, + description, labels, etc.""" + + UPDATE_ACL = "UPDATE_ACL" + """Includes ACL information for the dataset, which defines dataset access + for one or more entities.""" + + UPDATE_FULL = "UPDATE_FULL" + """Includes both dataset metadata and ACL information.""" + + class JobCreationMode(object): """Documented values for Job Creation Mode.""" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8ce8d2cbd..ed092bcdb 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -60,7 +60,8 @@ from google.cloud.bigquery import job as bqjob import google.cloud.bigquery._job_helpers -from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.dataset import DatasetReference, Dataset +from google.cloud.bigquery.enums import UpdateMode from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -2101,6 +2102,7 @@ def test_update_dataset(self): }, path="/" + PATH, timeout=7.5, + query_params={}, ) self.assertEqual(ds2.description, ds.description) self.assertEqual(ds2.friendly_name, ds.friendly_name) @@ -2114,6 +2116,94 @@ def test_update_dataset(self): client.update_dataset(ds, []) req = conn.api_request.call_args self.assertEqual(req[1]["headers"]["If-Match"], "etag") + self.assertEqual(req[1].get("query_params"), {}) + + def test_update_dataset_w_update_mode(self): + PATH = f"projects/{self.PROJECT}/datasets/{self.DS_ID}" + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + DESCRIPTION = "DESCRIPTION" + RESOURCE = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "description": DESCRIPTION, + } + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + orig_dataset = Dataset(dataset_ref) + orig_dataset.description = DESCRIPTION + filter_fields = ["description"] + + test_cases = [ + (None, None), + (UpdateMode.UPDATE_MODE_UNSPECIFIED, "UPDATE_MODE_UNSPECIFIED"), + (UpdateMode.UPDATE_METADATA, "UPDATE_METADATA"), + (UpdateMode.UPDATE_ACL, "UPDATE_ACL"), + (UpdateMode.UPDATE_FULL, "UPDATE_FULL"), + ] + + for update_mode_arg, expected_param_value in test_cases: + with self.subTest( + update_mode_arg=update_mode_arg, + expected_param_value=expected_param_value, + ): + conn = client._connection = make_connection(RESOURCE, RESOURCE) + + new_dataset = client.update_dataset( + orig_dataset, + fields=filter_fields, + update_mode=update_mode_arg, + ) + self.assertEqual(orig_dataset.description, new_dataset.description) + + if expected_param_value: + expected_query_params = {"updateMode": expected_param_value} + else: + expected_query_params = {} + + conn.api_request.assert_called_once_with( + method="PATCH", + path="/" + PATH, + data={"description": DESCRIPTION}, + timeout=DEFAULT_TIMEOUT, + query_params=expected_query_params if expected_query_params else {}, + ) + + def test_update_dataset_w_invalid_update_mode(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + DESCRIPTION = "DESCRIPTION" + resource = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + } + + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + orig_dataset = Dataset(dataset_ref) + orig_dataset.description = DESCRIPTION + filter_fields = ["description"] # A non-empty list of fields is required + + # Mock the connection to prevent actual API calls + # and to provide a minimal valid response if the call were to proceed. + conn = client._connection = make_connection(resource) + + test_cases = [ + "INVALID_STRING", + 123, + 123.45, + object(), + ] + + for invalid_update_mode in test_cases: + with self.subTest(invalid_update_mode=invalid_update_mode): + conn.api_request.reset_mock() # Reset mock for each sub-test + with self.assertRaises(AttributeError): + client.update_dataset( + orig_dataset, + fields=filter_fields, + update_mode=invalid_update_mode, + ) def test_update_dataset_w_custom_property(self): # The library should handle sending properties to the API that are not @@ -2145,6 +2235,7 @@ def test_update_dataset_w_custom_property(self): data={"newAlphaProperty": "unreleased property"}, path=path, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual(dataset.dataset_id, self.DS_ID) From 28a5750d455f0381548df6f9b1f7661823837d81 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 2 Jun 2025 05:42:08 -0400 Subject: [PATCH 469/536] feat: Adds dataset_view parameter to get_dataset method (#2198) * feat: Add dataset_view parameter to get_dataset method This commit introduces a new `dataset_view` parameter to the `get_dataset` method in the BigQuery client. This allows you to specify the level of detail (METADATA, ACL, FULL) returned when fetching a dataset. The `DatasetView` enum has been added to `enums.py`. Unit tests have been added to verify: - Correct query parameter (`view`) formation for each enum value. - Correct behavior when `dataset_view` is None. - AttributeError is raised for invalid `dataset_view` types. * test edits, linting, etc. * Fixes docstring * updates docstrings * update parameter name to align with discovery doc * Update google/cloud/bigquery/client.py --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/client.py | 26 +++++++++++- google/cloud/bigquery/enums.py | 18 ++++++++ tests/unit/test_client.py | 70 ++++++++++++++++++++++++++++++- tests/unit/test_create_dataset.py | 7 +++- 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cc3b3eb2a..bb4d80c73 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -90,8 +90,8 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.enums import UpdateMode + +from google.cloud.bigquery.enums import AutoRowIDs, DatasetView, UpdateMode from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, @@ -865,6 +865,7 @@ def get_dataset( dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + dataset_view: Optional[DatasetView] = None, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -882,7 +883,21 @@ def get_dataset( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + dataset_view (Optional[google.cloud.bigquery.enums.DatasetView]): + Specifies the view that determines which dataset information is + returned. By default, dataset metadata (e.g. friendlyName, description, + labels, etc) and ACL information are returned. This argument can + take on the following possible enum values. + * :attr:`~google.cloud.bigquery.enums.DatasetView.ACL`: + Includes dataset metadata and the ACL. + * :attr:`~google.cloud.bigquery.enums.DatasetView.FULL`: + Includes all dataset metadata, including the ACL and table metadata. + This view is not supported by the `datasets.list` API method. + * :attr:`~google.cloud.bigquery.enums.DatasetView.METADATA`: + Includes basic dataset metadata, but not the ACL. + * :attr:`~google.cloud.bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED`: + The server will decide which view to use. Currently defaults to FULL. Returns: google.cloud.bigquery.dataset.Dataset: A ``Dataset`` instance. @@ -892,6 +907,12 @@ def get_dataset( dataset_ref, default_project=self.project ) path = dataset_ref.path + + if dataset_view: + query_params = {"datasetView": dataset_view.value} + else: + query_params = {} + span_attributes = {"path": path} api_response = self._call_api( retry, @@ -900,6 +921,7 @@ def get_dataset( method="GET", path=path, timeout=timeout, + query_params=query_params, ) return Dataset.from_api_repr(api_response) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index e9cd911d0..9a1e4880c 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -80,6 +80,24 @@ class CreateDisposition(object): returned in the job result.""" +class DatasetView(enum.Enum): + """DatasetView specifies which dataset information is returned.""" + + DATASET_VIEW_UNSPECIFIED = "DATASET_VIEW_UNSPECIFIED" + """The default value. Currently maps to the FULL view.""" + + METADATA = "METADATA" + """View metadata information for the dataset, such as friendlyName, + description, labels, etc.""" + + ACL = "ACL" + """View ACL information for the dataset, which defines dataset access + for one or more entities.""" + + FULL = "FULL" + """View both dataset metadata and ACL information.""" + + class DefaultPandasDTypes(enum.Enum): """Default Pandas DataFrem DTypes to convert BigQuery data. These Sentinel values are used instead of None to maintain backward compatibility, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ed092bcdb..42bfc84b9 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -61,7 +61,7 @@ from google.cloud.bigquery import job as bqjob import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference, Dataset -from google.cloud.bigquery.enums import UpdateMode +from google.cloud.bigquery.enums import UpdateMode, DatasetView from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -753,7 +753,7 @@ def test_get_dataset(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 + method="GET", path="/%s" % path, timeout=7.5, query_params={} ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -819,6 +819,72 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) + def test_get_dataset_with_dataset_view(self): + path = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + test_cases = [ + (None, None), + (DatasetView.DATASET_VIEW_UNSPECIFIED, "DATASET_VIEW_UNSPECIFIED"), + (DatasetView.METADATA, "METADATA"), + (DatasetView.ACL, "ACL"), + (DatasetView.FULL, "FULL"), + ] + + for dataset_view_arg, expected_param_value in test_cases: + with self.subTest( + dataset_view_arg=dataset_view_arg, + expected_param_value=expected_param_value, + ): + # Re-initialize the connection mock for each sub-test to reset side_effect + conn = client._connection = make_connection(resource) + + dataset = client.get_dataset(dataset_ref, dataset_view=dataset_view_arg) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + + if expected_param_value: + expected_query_params = {"datasetView": expected_param_value} + else: + expected_query_params = {} + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % path, + timeout=DEFAULT_TIMEOUT, + query_params=expected_query_params if expected_query_params else {}, + ) + + def test_get_dataset_with_invalid_dataset_view(self): + invalid_view_values = [ + "INVALID_STRING", + 123, + 123.45, + object(), + ] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + conn = client._connection = make_connection(resource) + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + for invalid_view_value in invalid_view_values: + with self.subTest(invalid_view_value=invalid_view_value): + conn.api_request.reset_mock() # Reset mock for each sub-test + with self.assertRaises(AttributeError): + client.get_dataset(dataset_ref, dataset_view=invalid_view_value) + def test_ensure_bqstorage_client_creating_new_instance(self): bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index bd7c6a8f8..b144471ca 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -372,7 +372,12 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) }, timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), + mock.call( + method="GET", + path=get_path, + timeout=DEFAULT_TIMEOUT, + query_params={}, + ), ] ) From 0378caa0fdaeec23929b179ca62a7199a8a6098d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 5 Jun 2025 00:19:15 +0200 Subject: [PATCH 470/536] chore(deps): update all dependencies (#2209) * chore(deps): update all dependencies * pin geopandas===1.0.1 for python <= 3.9 --------- Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 9 +++++---- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 7 files changed, 11 insertions(+), 10 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 2ad35b418..4b9c515a7 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 3ca365401..824a1df4a 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 049e88237..5ff1c0c02 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -9,8 +9,9 @@ cligj==0.7.2 db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 -geopandas==1.0.1 -google-api-core==2.24.2 +geopandas===1.0.1; python_version <= '3.9' +geopandas==1.1.0; python_version >= '3.10' +google-api-core==2.25.0 google-auth==2.40.2 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 @@ -18,7 +19,7 @@ google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.71.0 +grpcio==1.72.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 @@ -38,6 +39,6 @@ rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.2 +typing-extensions==4.14.0 typing-inspect==0.9.0 urllib3==2.4.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 2ad35b418..4b9c515a7 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 2ad35b418..4b9c515a7 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 27eb7459a..c3feffb35 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -3,7 +3,7 @@ db-dtypes==1.4.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.2.0; python_version >= '3.10' +ipython==9.3.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' pandas==2.2.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 767f71fb1..d311187ec 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 From 45643a2e20ce5d503118522dd195aeca00dec3bc Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 5 Jun 2025 11:01:02 -0700 Subject: [PATCH 471/536] fix: fix rows returned when both start_index and page_size are provided (#2181) * fix: fix total rows returned when both start_index and page_size are provided * use shallow copy and add comments * add docstring * add unit test * lint * add comment --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/client.py | 9 +++- google/cloud/bigquery/job/query.py | 8 ++++ google/cloud/bigquery/table.py | 11 ++++- tests/unit/job/test_query.py | 72 ++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index bb4d80c73..811e9ef03 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2045,6 +2045,7 @@ def _get_query_results( location: Optional[str] = None, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = 0, + start_index: Optional[int] = None, ) -> _QueryResults: """Get the query results object for a query job. @@ -2063,9 +2064,12 @@ def _get_query_results( before using ``retry``. If set, this connection timeout may be increased to a minimum value. This prevents retries on what would otherwise be a successful response. - page_size (int): + page_size (Optional[int]): Maximum number of rows in a single response. See maxResults in the jobs.getQueryResults REST API. + start_index (Optional[int]): + Zero-based index of the starting row. See startIndex in the + jobs.getQueryResults REST API. Returns: google.cloud.bigquery.query._QueryResults: @@ -2095,6 +2099,9 @@ def _get_query_results( if location is not None: extra_params["location"] = location + if start_index is not None: + extra_params["startIndex"] = start_index + path = "/projects/{}/queries/{}".format(project, job_id) # This call is typically made in a polling loop that checks whether the diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 954a46963..4d95f0e71 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1409,6 +1409,7 @@ def _reload_query_results( retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None, page_size: int = 0, + start_index: Optional[int] = None, ): """Refresh the cached query results unless already cached and complete. @@ -1421,6 +1422,9 @@ def _reload_query_results( page_size (int): Maximum number of rows in a single response. See maxResults in the jobs.getQueryResults REST API. + start_index (Optional[int]): + Zero-based index of the starting row. See startIndex in the + jobs.getQueryResults REST API. """ # Optimization: avoid a call to jobs.getQueryResults if it's already # been fetched, e.g. from jobs.query first page of results. @@ -1468,6 +1472,7 @@ def _reload_query_results( location=self.location, timeout=transport_timeout, page_size=page_size, + start_index=start_index, ) def result( # type: ignore # (incompatible with supertype) @@ -1570,6 +1575,9 @@ def result( # type: ignore # (incompatible with supertype) if page_size is not None: reload_query_results_kwargs["page_size"] = page_size + if start_index is not None: + reload_query_results_kwargs["start_index"] = start_index + try: retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 3ffd5ca56..861f806b4 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1987,12 +1987,19 @@ def _get_next_page_response(self): return response params = self._get_query_params() + + # If the user has provided page_size and start_index, we need to pass + # start_index for the first page, but for all subsequent pages, we + # should not pass start_index. We make a shallow copy of params and do + # not alter the original, so if the user iterates the results again, + # start_index is preserved. + params_copy = copy.copy(params) if self._page_size is not None: if self.page_number and "startIndex" in params: - del params["startIndex"] + del params_copy["startIndex"] return self.api_request( - method=self._HTTP_METHOD, path=self.path, query_params=params + method=self._HTTP_METHOD, path=self.path, query_params=params_copy ) @property diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 1df65279d..46b802aa3 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -1682,6 +1682,78 @@ def test_result_with_start_index(self): tabledata_list_request[1]["query_params"]["maxResults"], page_size ) + def test_result_with_start_index_multi_page(self): + # When there are multiple pages of response and the user has set + # start_index, we should supply start_index to the server in the first + # request. However, in the subsequent requests, we will pass only + # page_token but not start_index, because the server only allows one + # of them. + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "7", + } + + # Although the result has 7 rows, the response only returns 6, because + # start_index is 1. + tabledata_resource_1 = { + "totalRows": "7", + "pageToken": "page_token_1", + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + ], + } + tabledata_resource_2 = { + "totalRows": "7", + "pageToken": None, + "rows": [ + {"f": [{"v": "jkl"}]}, + {"f": [{"v": "mno"}]}, + {"f": [{"v": "pqe"}]}, + ], + } + + connection = make_connection( + query_resource, tabledata_resource_1, tabledata_resource_2 + ) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + start_index = 1 + page_size = 3 + + result = job.result(page_size=page_size, start_index=start_index) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 7) + + rows = list(result) + + self.assertEqual(len(rows), 6) + self.assertEqual(len(connection.api_request.call_args_list), 3) + + # First call has both startIndex and maxResults. + tabledata_list_request_1 = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request_1[1]["query_params"]["startIndex"], start_index + ) + self.assertEqual( + tabledata_list_request_1[1]["query_params"]["maxResults"], page_size + ) + + # Second call only has maxResults. + tabledata_list_request_2 = connection.api_request.call_args_list[2] + self.assertFalse("startIndex" in tabledata_list_request_2[1]["query_params"]) + self.assertEqual( + tabledata_list_request_2[1]["query_params"]["maxResults"], page_size + ) + def test_result_error(self): from google.cloud import exceptions From bd5aba8ba40c2f35fb672a68eed11d6baedb304f Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 5 Jun 2025 12:01:06 -0700 Subject: [PATCH 472/536] docs: Improve clarity of "Output Only" fields in Dataset class (#2201) fixes b/407210727 --- google/cloud/bigquery/dataset.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index f788275cd..ec4098511 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -574,6 +574,10 @@ class Dataset(object): A pointer to a dataset. If ``dataset_ref`` is a string, it must include both the project ID and the dataset ID, separated by ``.``. + + Note: + Fields marked as "Output Only" are populated by the server and will only be + available after calling :meth:`google.cloud.bigquery.client.Client.get_dataset`. """ _PROPERTY_TO_API_FIELD = { @@ -692,7 +696,7 @@ def access_entries(self, value): @property def created(self): - """Union[datetime.datetime, None]: Datetime at which the dataset was + """Union[datetime.datetime, None]: Output only. Datetime at which the dataset was created (:data:`None` until set from the server). """ creation_time = self._properties.get("creationTime") @@ -709,8 +713,8 @@ def dataset_id(self): @property def full_dataset_id(self): - """Union[str, None]: ID for the dataset resource (:data:`None` until - set from the server) + """Union[str, None]: Output only. ID for the dataset resource + (:data:`None` until set from the server). In the format ``project_id:dataset_id``. """ @@ -725,14 +729,14 @@ def reference(self): @property def etag(self): - """Union[str, None]: ETag for the dataset resource (:data:`None` until - set from the server). + """Union[str, None]: Output only. ETag for the dataset resource + (:data:`None` until set from the server). """ return self._properties.get("etag") @property def modified(self): - """Union[datetime.datetime, None]: Datetime at which the dataset was + """Union[datetime.datetime, None]: Output only. Datetime at which the dataset was last modified (:data:`None` until set from the server). """ modified_time = self._properties.get("lastModifiedTime") @@ -744,8 +748,8 @@ def modified(self): @property def self_link(self): - """Union[str, None]: URL for the dataset resource (:data:`None` until - set from the server). + """Union[str, None]: Output only. URL for the dataset resource + (:data:`None` until set from the server). """ return self._properties.get("selfLink") From 99493bfb0d6230b9a04583d2b9dc40bc84ffdc49 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 9 Jun 2025 12:31:37 -0400 Subject: [PATCH 473/536] test: remove pragma (#2212) * test: remove pragma * test: remove comment about pragma * updates to conditionals related to pandas 2.0+ tests --- google/cloud/bigquery/_pandas_helpers.py | 9 ++------- tests/unit/job/test_query_pandas.py | 12 +++++------- tests/unit/test_table_pandas.py | 14 ++++++-------- 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 10a5c59bb..2dab03a06 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -337,13 +337,8 @@ def types_mapper(arrow_data_type): ): return range_date_dtype - # TODO: this section does not have a test yet OR at least not one that is - # recognized by coverage, hence the pragma. See Issue: #2132 - elif ( - range_timestamp_dtype is not None - and arrow_data_type.equals( # pragma: NO COVER - range_timestamp_dtype.pyarrow_dtype - ) + elif range_timestamp_dtype is not None and arrow_data_type.equals( + range_timestamp_dtype.pyarrow_dtype ): return range_timestamp_dtype diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index d82f0dfe3..a6c59b158 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -647,12 +647,6 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) -# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 -# pragma added due to issues with coverage. -@pytest.mark.skipif( - pandas.__version__.startswith("2."), - reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", -) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class @@ -704,7 +698,6 @@ def test_to_dataframe_column_dtypes(): exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] assert list(df) == exp_columns # verify the column names - assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" assert df.seconds.dtype.name == "Int64" assert df.miles.dtype.name == "float64" assert df.km.dtype.name == "float16" @@ -712,6 +705,11 @@ def test_to_dataframe_column_dtypes(): assert df.complete.dtype.name == "boolean" assert df.date.dtype.name == "dbdate" + if pandas.__version__.startswith("2."): + assert df.start_timestamp.dtype.name == "datetime64[us, UTC]" + else: + assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" + def test_to_dataframe_column_date_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 43d64d77d..a4fa3fa39 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -34,12 +34,6 @@ def class_under_test(): return RowIterator -# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 -# pragma added due to issues with coverage. -@pytest.mark.skipif( - pandas.__version__.startswith("2."), - reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", -) def test_to_dataframe_nullable_scalars( monkeypatch, class_under_test ): # pragma: NO COVER @@ -113,14 +107,18 @@ def test_to_dataframe_nullable_scalars( assert df.dtypes["bool_col"].name == "boolean" assert df.dtypes["bytes_col"].name == "object" assert df.dtypes["date_col"].name == "dbdate" - assert df.dtypes["datetime_col"].name == "datetime64[ns]" assert df.dtypes["float64_col"].name == "float64" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["numeric_col"].name == "object" assert df.dtypes["string_col"].name == "object" assert df.dtypes["time_col"].name == "dbtime" - assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" assert df.dtypes["json_col"].name == "object" + if pandas.__version__.startswith("2."): + assert df.dtypes["datetime_col"].name == "datetime64[us]" + assert df.dtypes["timestamp_col"].name == "datetime64[us, UTC]" + else: + assert df.dtypes["datetime_col"].name == "datetime64[ns]" + assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" # Check for expected values. assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415") From dc374b4e22de98850c54643a58bb9e80f865dcf7 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jun 2025 18:58:40 +0200 Subject: [PATCH 474/536] chore(deps): update all dependencies (#2211) Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5ff1c0c02..4ebff482d 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,19 +12,19 @@ geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.0; python_version >= '3.10' google-api-core==2.25.0 -google-auth==2.40.2 +google-auth==2.40.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.72.1 +grpcio==1.73.0 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.2.3 +pandas==2.3.0 proto-plus==1.26.1 pyarrow==20.0.0 pyasn1==0.6.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 960eb6db4..d2456fc5a 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -3,4 +3,4 @@ db-dtypes==1.4.3 google.cloud.bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.2.3 +pandas==2.3.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index c3feffb35..66409e49d 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -6,4 +6,4 @@ ipython===8.18.1; python_version == '3.9' ipython==9.3.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' -pandas==2.2.3 +pandas==2.3.0 From 5a0fbf5cca551626d3cb49f934369049450546c1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jun 2025 20:53:46 +0200 Subject: [PATCH 475/536] chore(deps): update dependency requests to v2.32.4 (#2213) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 4ebff482d..75a196eeb 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -34,7 +34,7 @@ pyparsing==3.2.3 python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 -requests==2.32.3 +requests==2.32.4 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' From 96b067da092836f0d8e19d5df683a0e5680caee8 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 13 Jun 2025 19:49:19 +0200 Subject: [PATCH 476/536] chore(deps): update dependency google-api-core to v2.25.1 (#2215) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 75a196eeb..daaf67b9e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.0; python_version >= '3.10' -google-api-core==2.25.0 +google-api-core==2.25.1 google-auth==2.40.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 From 4fbb37595f0e148b7912f26ac8e48a996a6cbae6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Jun 2025 14:36:25 -0700 Subject: [PATCH 477/536] chore(deps): bump urllib3 from 2.4.0 to 2.5.0 in /samples/geography (#2220) Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.4.0 to 2.5.0. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.4.0...2.5.0) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.5.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index daaf67b9e..379d682b4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -41,4 +41,4 @@ Shapely==2.1.1; python_version >= '3.10' six==1.17.0 typing-extensions==4.14.0 typing-inspect==0.9.0 -urllib3==2.4.0 +urllib3==2.5.0 From 6e70fe2c4c0bec6d6aeb16ab5a83b01746e8c64a Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Mon, 23 Jun 2025 11:50:33 -0400 Subject: [PATCH 478/536] chore: add label job sample (#2219) * chore: add label job sample * lint * remove unnecessary api call * Apply suggestions from code review Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- samples/snippets/label_job.py | 36 ++++++++++++++++++++++++++++++ samples/snippets/label_job_test.py | 31 +++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 samples/snippets/label_job.py create mode 100644 samples/snippets/label_job_test.py diff --git a/samples/snippets/label_job.py b/samples/snippets/label_job.py new file mode 100644 index 000000000..cfd06d189 --- /dev/null +++ b/samples/snippets/label_job.py @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def label_job() -> None: + # [START bigquery_label_job] + from google.cloud import bigquery + + client = bigquery.Client() + + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + labels = {"color": "green"} + + config = bigquery.QueryJobConfig() + config.labels = labels + location = "us" + job = client.query(sql, location=location, job_config=config) + job_id = job.job_id + + print(f"Added {job.labels} to {job_id}.") + # [END bigquery_label_job] diff --git a/samples/snippets/label_job_test.py b/samples/snippets/label_job_test.py new file mode 100644 index 000000000..0780db61a --- /dev/null +++ b/samples/snippets/label_job_test.py @@ -0,0 +1,31 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import label_job # type: ignore + + +if typing.TYPE_CHECKING: + import pytest + + +def test_label_job( + capsys: "pytest.CaptureFixture[str]", +) -> None: + label_job.label_job() + + out, _ = capsys.readouterr() + assert "color" in out + assert "green" in out From cd2e1387c98e9df74ec85b1f3a3aba371d9ad7b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 23 Jun 2025 20:32:50 +0200 Subject: [PATCH 479/536] chore: update PyPI URL for official nightly pyarrow repository (#2223) Co-authored-by: Lingqing Gan --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 6807b7ee4..eb79c238d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -408,10 +408,10 @@ def prerelease_deps(session): ) # PyArrow prerelease packages are published to an alternative PyPI host. - # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages + # https://arrow.apache.org/docs/developers/python.html#installing-nightly-packages session.install( "--extra-index-url", - "https://pypi.fury.io/arrow-nightlies/", + "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple", "--prefer-binary", "--pre", "--upgrade", From 7c9e7fde1d710641c27247fa5f5271c86a9be2b1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 25 Jun 2025 19:08:09 +0200 Subject: [PATCH 480/536] chore(deps): update all dependencies (#2216) Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 2 +- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 4b9c515a7..b3046227c 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 824a1df4a..ee895a4f4 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 379d682b4..f8f79a970 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.4.26 +certifi==2025.6.15 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 4b9c515a7..b3046227c 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 4b9c515a7..b3046227c 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index d311187ec..d71018b3f 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 From 4941de441cb32cabeb55ec0320f305fb62551155 Mon Sep 17 00:00:00 2001 From: Prabakar <86585391+drokeye@users.noreply.github.com> Date: Thu, 26 Jun 2025 23:28:53 +0530 Subject: [PATCH 481/536] fix: make AccessEntry equality consistent with from_api_repr (#2218) * fix: make AccessEntry equality consistent for view entity type * fix: make AccessEntry equality consistent for view entity type * fix: use json.dumps() for normalizaiton of entity_id * remove trailing whitespace and add test assertions * revert back to the original code * fix linting in `dataset.py` * fix linting in `test_dataset.py` --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/dataset.py | 17 +++++++- tests/unit/test_dataset.py | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index ec4098511..878b77d41 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -17,6 +17,7 @@ from __future__ import absolute_import import copy +import json import typing from typing import Optional, List, Dict, Any, Union @@ -506,7 +507,20 @@ def entity_id(self) -> Optional[Union[Dict[str, Any], str]]: def __eq__(self, other): if not isinstance(other, AccessEntry): return NotImplemented - return self._key() == other._key() + return ( + self.role == other.role + and self.entity_type == other.entity_type + and self._normalize_entity_id(self.entity_id) + == self._normalize_entity_id(other.entity_id) + and self.condition == other.condition + ) + + @staticmethod + def _normalize_entity_id(value): + """Ensure consistent equality for dicts like 'view'.""" + if isinstance(value, dict): + return json.dumps(value, sort_keys=True) + return value def __ne__(self, other): return not self == other @@ -557,7 +571,6 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": google.cloud.bigquery.dataset.AccessEntry: Access entry parsed from ``resource``. """ - access_entry = cls() access_entry._properties = resource.copy() return access_entry diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 3fd2579af..604e5ed2e 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1767,3 +1767,70 @@ def test__hash__with_minimal_inputs(self): description=None, ) assert hash(cond1) is not None + + def test_access_entry_view_equality(self): + from google.cloud import bigquery + + entry1 = bigquery.dataset.AccessEntry( + entity_type="view", + entity_id={ + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "my_table", + }, + ) + entry2 = bigquery.dataset.AccessEntry.from_api_repr( + { + "view": { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "my_table", + } + } + ) + + entry3 = bigquery.dataset.AccessEntry( + entity_type="routine", + entity_id={ + "projectId": "my_project", + "datasetId": "my_dataset", + "routineId": "my_routine", + }, + ) + + entry4 = bigquery.dataset.AccessEntry.from_api_repr( + { + "routine": { + "projectId": "my_project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + } + ) + + entry5 = bigquery.dataset.AccessEntry( + entity_type="dataset", + entity_id={ + "dataset": { + "projectId": "my_project", + "datasetId": "my_dataset", + }, + "target_types": "VIEWS", + }, + ) + + entry6 = bigquery.dataset.AccessEntry.from_api_repr( + { + "dataset": { + "dataset": { + "projectId": "my_project", + "datasetId": "my_dataset", + }, + "target_types": "VIEWS", + } + } + ) + + assert entry1 == entry2 + assert entry3 == entry4 + assert entry5 == entry6 From 37e4e0ed8e6ffba6584a37131f03cb77b4fcfe64 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 27 Jun 2025 21:00:19 +0200 Subject: [PATCH 482/536] chore(deps): update all dependencies (#2224) --- samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index f8f79a970..68f6c1662 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -4,13 +4,13 @@ cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' click==8.2.1; python_version >= '3.10' -click-plugins==1.1.1 +click-plugins==1.1.1.2 cligj==0.7.2 db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' -geopandas==1.1.0; python_version >= '3.10' +geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 google-cloud-bigquery==3.34.0 @@ -19,7 +19,7 @@ google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.73.0 +grpcio==1.73.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 From 27ff3a89a5f97305fa3ff673aa9183baa7df200f Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 2 Jul 2025 17:00:39 -0400 Subject: [PATCH 483/536] =?UTF-8?q?fix:=20adds=20magics.context.project=20?= =?UTF-8?q?to=20eliminate=20issues=20with=20unit=20tests=20=E2=80=A6=20(#2?= =?UTF-8?q?228)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `magics.context.project` to eliminate issues with unit tests in an upcoming PR. Several magics unit tests fail with an error message. If the test does not have knowledge of the project, it attempts to initiate a login sequence to be able to get the project identifier. The login cannot complete because the process is running in an ipython interpreter and pytest does not capture any input. This change provides an explicit reference to a project to avoid that process. ``` Please visit this URL to authorize this application: [REDACTED DUE TO SPACE REASONS] self = <_pytest.capture.DontReadFromInput object at 0x7f55d6821bd0>, size = -1 def read(self, size: int = -1) -> str: > raise OSError( "pytest: reading from stdin while output is captured! Consider using `-s`.") E OSError: pytest: reading from stdin while output is captured! Consider using `-s`. .nox/unit-3-11/lib/python3.11/site-packages/_pytest/capture.py:229: OSError ``` --- tests/unit/test_magics.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 814150693..c79e923f8 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -986,6 +986,7 @@ def test_bigquery_magic_dryrun_option_sets_job_config(monkeypatch): google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) @@ -1007,6 +1008,7 @@ def test_bigquery_magic_dryrun_option_returns_query_job(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1035,6 +1037,7 @@ def test_bigquery_magic_dryrun_option_variable_error_message( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" ipython_ns_cleanup.append((ip, "q_job")) run_query_patch = mock.patch( @@ -1064,6 +1067,7 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable( magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1098,6 +1102,7 @@ def test_bigquery_magic_saves_query_job_to_variable_on_error( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" ipython_ns_cleanup.append((ip, "result")) client_query_patch = mock.patch( From 7ed9fd293ab1181b5b7b97e7e9ec82aade56e7ef Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 7 Jul 2025 22:52:24 +0200 Subject: [PATCH 484/536] chore(deps): update all dependencies (#2226) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Change | Age | Confidence | |---|---|---|---| | [bigquery-magics](https://redirect.github.com/googleapis/python-bigquery-magics) | `==0.10.0` -> `==0.10.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/bigquery-magics/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/bigquery-magics/0.10.0/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [bigquery_magics](https://redirect.github.com/googleapis/python-bigquery-magics) | `==0.10.0` -> `==0.10.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/bigquery-magics/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/bigquery-magics/0.10.0/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [ipython](https://redirect.github.com/ipython/ipython) | `==9.3.0` -> `==9.4.0` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/ipython/9.4.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/ipython/9.3.0/9.4.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [pandas](https://redirect.github.com/pandas-dev/pandas) | `==2.3.0` -> `==2.3.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/pandas/2.3.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/pandas/2.3.0/2.3.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [pytest-xdist](https://redirect.github.com/pytest-dev/pytest-xdist) ([changelog](https://pytest-xdist.readthedocs.io/en/latest/changelog.html)) | `==3.7.0` -> `==3.8.0` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/pytest-xdist/3.8.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/pytest-xdist/3.7.0/3.8.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [typing-extensions](https://redirect.github.com/python/typing_extensions) ([changelog](https://redirect.github.com/python/typing_extensions/blob/main/CHANGELOG.md)) | `==4.14.0` -> `==4.14.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/typing-extensions/4.14.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/typing-extensions/4.14.0/4.14.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-magics (bigquery-magics) ### [`v0.10.1`](https://redirect.github.com/googleapis/python-bigquery-magics/blob/HEAD/CHANGELOG.md#0101-2025-07-07) [Compare Source](https://redirect.github.com/googleapis/python-bigquery-magics/compare/v0.10.0...v0.10.1) ##### Dependencies - Move spanner-graph-notebook back to version 1.1.6 ([#​126](https://redirect.github.com/googleapis/python-bigquery-magics/issues/126)) ([17ee695](https://redirect.github.com/googleapis/python-bigquery-magics/commit/17ee6956c8fec740440836609a9106e900b63074))
ipython/ipython (ipython) ### [`v9.4.0`](https://redirect.github.com/ipython/ipython/compare/9.3.0...9.4.0) [Compare Source](https://redirect.github.com/ipython/ipython/compare/9.3.0...9.4.0)
pandas-dev/pandas (pandas) ### [`v2.3.1`](https://redirect.github.com/pandas-dev/pandas/releases/tag/v2.3.1): Pandas 2.3.1 [Compare Source](https://redirect.github.com/pandas-dev/pandas/compare/v2.3.0...v2.3.1) We are pleased to announce the release of pandas 2.3.1. This release includes some improvements and fixes to the future string data type (preview feature for the upcoming pandas 3.0). We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/2.3.1/whatsnew/v2.3.1.html) for a list of all the changes. Pandas 2.3.1 supports Python 3.9 and higher. The release will be available on the conda-forge channel: ``` conda install pandas --channel conda-forge ``` Or via PyPI: ``` python3 -m pip install --upgrade pandas ``` Please report any issues with the release on the [pandas issue tracker](https://redirect.github.com/pandas-dev/pandas/issues). Thanks to all the contributors who made this release possible.
pytest-dev/pytest-xdist (pytest-xdist) ### [`v3.8.0`](https://redirect.github.com/pytest-dev/pytest-xdist/blob/HEAD/CHANGELOG.rst#pytest-xdist-380-2025-06-30) [Compare Source](https://redirect.github.com/pytest-dev/pytest-xdist/compare/v3.7.0...v3.8.0) \=============================== ## Features - `#​1083 `\_: Add `--no-loadscope-reorder` and `--loadscope-reorder` option to control whether to automatically reorder tests in loadscope for tests where relative ordering matters. This only applies when using `loadscope`. For example, \[test\_file\_1, test\_file\_2, ..., test\_file\_n] are given as input test files, if `--no-loadscope-reorder` is used, for either worker, the `test_file_a` will be executed before `test_file_b` only if `a < b`. The default behavior is to reorder the tests to maximize the number of tests that can be executed in parallel.
python/typing_extensions (typing-extensions) ### [`v4.14.1`](https://redirect.github.com/python/typing_extensions/blob/HEAD/CHANGELOG.md#Release-4141-July-4-2025) [Compare Source](https://redirect.github.com/python/typing_extensions/compare/4.14.0...4.14.1) - Fix usage of `typing_extensions.TypedDict` nested inside other types (e.g., `typing.Type[typing_extensions.TypedDict]`). This is not allowed by the type system but worked on older versions, so we maintain support.
--- ### Configuration 📅 **Schedule**: Branch creation - At any time (no schedule defined), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://redirect.github.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR was generated by [Mend Renovate](https://mend.io/renovate/). View the [repository job log](https://developer.mend.io/github/googleapis/python-bigquery). --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 6 +++--- samples/snippets/requirements-test.txt | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index b3046227c..3bf52c85d 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index ee895a4f4..d449b373b 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 68f6c1662..5b342fe5c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,7 @@ idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.3.0 +pandas==2.3.1 proto-plus==1.26.1 pyarrow==20.0.0 pyasn1==0.6.1 @@ -39,6 +39,6 @@ rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.14.0 +typing-extensions==4.14.1 typing-inspect==0.9.0 urllib3==2.5.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index b3046227c..3bf52c85d 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index d2456fc5a..b53a35982 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.10.0 +bigquery_magics==0.10.1 db-dtypes==1.4.3 google.cloud.bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.3.0 +pandas==2.3.1 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index b3046227c..3bf52c85d 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 66409e49d..4b134ac9d 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ -bigquery-magics==0.10.0 +bigquery-magics==0.10.1 db-dtypes==1.4.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.3.0; python_version >= '3.10' +ipython==9.4.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' -pandas==2.3.0 +pandas==2.3.1 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index d71018b3f..cef3450e1 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -2,4 +2,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 From b2300d032843512b7e4a5703377632fe60ef3f8d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 9 Jul 2025 19:12:23 -0400 Subject: [PATCH 485/536] feat: adds time_zone to external config and load job (#2229) This commit introduces new configuration options for BigQuery load jobs and external table definitions, aligning with recent updates to the underlying protos. New options added: `time_zone`: Time zone used when parsing timestamp values that do not have specific time zone information. (Applies to `LoadJobConfig`, `LoadJob`, and `ExternalConfig`) Changes include: Added corresponding properties (getters/setters) to `LoadJobConfig`, `LoadJob`, and `ExternalConfig`. Updated docstrings and type hints for all new attributes. Updated unit tests to cover the new options, ensuring they are correctly handled during object initialization, serialization to API representation, and deserialization from API responses. --- google/cloud/bigquery/external_config.py | 17 +++ google/cloud/bigquery/job/load.py | 21 ++++ tests/unit/job/test_load.py | 32 ++++++ tests/unit/job/test_load_config.py | 127 +++++++++++++++++++++++ tests/unit/test_external_config.py | 7 ++ 5 files changed, 204 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index cb8141cd0..fcfcaca20 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -848,6 +848,23 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def time_zone(self) -> Optional[str]: + """Optional[str]: Time zone used when parsing timestamp values that do not + have specific time zone information (e.g. 2024-04-20 12:34:56). The expected + format is an IANA timezone string (e.g. America/Los_Angeles). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone + """ + + result = self._properties.get("timeZone") + return typing.cast(str, result) + + @time_zone.setter + def time_zone(self, value: Optional[str]): + self._properties["timeZone"] = value + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index e56ce16f0..5d49aef18 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -548,6 +548,20 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def time_zone(self) -> Optional[str]: + """Optional[str]: Default time zone that will apply when parsing timestamp + values that have no specific time zone. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone + """ + return self._get_sub_prop("timeZone") + + @time_zone.setter + def time_zone(self, value: Optional[str]): + self._set_sub_prop("timeZone", value) + @property def time_partitioning(self): """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based @@ -889,6 +903,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def time_zone(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`. + """ + return self.configuration.time_zone + @property def schema_update_options(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 10df46fb3..81d8e44b4 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -38,10 +38,14 @@ def _setUpConstants(self): self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" + self.TIME_ZONE = "UTC" + def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] + + config["timeZone"] = self.TIME_ZONE config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, @@ -152,6 +156,10 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "timeZone" in config: + self.assertEqual(job.time_zone, config["timeZone"]) + else: + self.assertIsNone(job.time_zone) def test_ctor(self): client = _make_client(project=self.PROJECT) @@ -195,6 +203,8 @@ def test_ctor(self): self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) + self.assertIsNone(job.time_zone) + def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.job import LoadJobConfig @@ -431,6 +441,24 @@ def test_from_api_repr_w_properties(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_to_api_repr(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource(ended=False) + + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client) + api_repr = job.to_api_repr() + + # as per the documentation in load.py -> LoadJob.to_api_repr(), + # the return value from to_api_repr should not include statistics + expected = { + "jobReference": RESOURCE["jobReference"], + "configuration": RESOURCE["configuration"], + } + + self.assertEqual(api_repr, expected) + def test_begin_w_already_running(self): conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) @@ -571,6 +599,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "timeZone": self.TIME_ZONE, } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = make_connection() @@ -599,6 +628,9 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" + + config.time_zone = self.TIME_ZONE + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 3a681c476..6424f7e68 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -828,6 +828,22 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_time_zone_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.time_zone) + + def test_time_zone_hit(self): + time_zone = "UTC" + config = self._get_target_class()() + config._properties["load"]["timeZone"] = time_zone + self.assertEqual(config.time_zone, time_zone) + + def test_time_zone_setter(self): + time_zone = "America/New_York" + config = self._get_target_class()() + config.time_zone = time_zone + self.assertEqual(config._properties["load"]["timeZone"], time_zone) + def test_parquet_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.parquet_options) @@ -901,3 +917,114 @@ def test_column_name_character_map_none(self): config._properties["load"]["columnNameCharacterMap"], ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, ) + + RESOURCE = { + "load": { + "allowJaggedRows": True, + "createDisposition": "CREATE_NEVER", + "encoding": "UTF-8", + "fieldDelimiter": ",", + "ignoreUnknownValues": True, + "maxBadRecords": 10, + "nullMarker": "\\N", + "quote": '"', + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "skipLeadingRows": "1", + "sourceFormat": "CSV", + "timePartitioning": { + "type": "DAY", + "field": "transaction_date", + }, + "useAvroLogicalTypes": True, + "writeDisposition": "WRITE_TRUNCATE", + "timeZone": "America/New_York", + "parquetOptions": {"enableListInference": True}, + "columnNameCharacterMap": "V2", + "someNewField": "some-value", + } + } + + def test_from_api_repr(self): + from google.cloud.bigquery.job import ( + CreateDisposition, + LoadJobConfig, + SourceFormat, + WriteDisposition, + ) + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType + + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = LoadJobConfig.from_api_repr(self.RESOURCE) + + self.assertTrue(config.allow_jagged_rows) + self.assertEqual(config.create_disposition, CreateDisposition.CREATE_NEVER) + self.assertEqual(config.encoding, "UTF-8") + self.assertEqual(config.field_delimiter, ",") + self.assertTrue(config.ignore_unknown_values) + self.assertEqual(config.max_bad_records, 10) + self.assertEqual(config.null_marker, "\\N") + self.assertEqual(config.quote_character, '"') + self.assertEqual( + config.schema, + [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")], + ) + self.assertEqual(config.skip_leading_rows, 1) + self.assertEqual(config.source_format, SourceFormat.CSV) + self.assertEqual( + config.time_partitioning, + TimePartitioning(type_=TimePartitioningType.DAY, field="transaction_date"), + ) + self.assertTrue(config.use_avro_logical_types) + self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE) + self.assertEqual(config.time_zone, "America/New_York") + self.assertTrue(config.parquet_options.enable_list_inference) + self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) + self.assertEqual(config._properties["load"]["someNewField"], "some-value") + + def test_to_api_repr(self): + from google.cloud.bigquery.job import ( + CreateDisposition, + LoadJobConfig, + SourceFormat, + WriteDisposition, + ) + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType + from google.cloud.bigquery.format_options import ParquetOptions + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = LoadJobConfig() + config.allow_jagged_rows = True + config.create_disposition = CreateDisposition.CREATE_NEVER + config.encoding = "UTF-8" + config.field_delimiter = "," + config.ignore_unknown_values = True + config.max_bad_records = 10 + config.null_marker = r"\N" + config.quote_character = '"' + config.schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + config.skip_leading_rows = 1 + config.source_format = SourceFormat.CSV + config.time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, field="transaction_date" + ) + config.use_avro_logical_types = True + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.time_zone = "America/New_York" + parquet_options = ParquetOptions() + parquet_options.enable_list_inference = True + config.parquet_options = parquet_options + config.column_name_character_map = ColumnNameCharacterMap.V2 + config._properties["load"]["someNewField"] = "some-value" + + api_repr = config.to_api_repr() + + expected = self.RESOURCE + self.assertEqual(api_repr, expected) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 7f84a9f5b..a89b7a1fb 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -26,6 +26,8 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] + TIME_ZONE = "America/Los_Angeles" + BASE_RESOURCE = { "sourceFormat": "", "sourceUris": SOURCE_URIS, @@ -33,6 +35,7 @@ class TestExternalConfig(unittest.TestCase): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "timeZone": TIME_ZONE, } def test_from_api_repr_base(self): @@ -79,6 +82,7 @@ def test_to_api_repr_base(self): ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] + ec.time_zone = self.TIME_ZONE exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } @@ -92,6 +96,7 @@ def test_to_api_repr_base(self): "compression": "compression", "connectionId": "path/to/connection", "schema": exp_schema, + "timeZone": self.TIME_ZONE, } self.assertEqual(got_resource, exp_resource) @@ -128,6 +133,8 @@ def _verify_base(self, ec): self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) + self.assertEqual(ec.time_zone, self.TIME_ZONE) + def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") got = ec.to_api_repr() From 3ed0a0a3d9699f1f70a616cfd06d1958b69e1f03 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 10 Jul 2025 19:44:58 +0200 Subject: [PATCH 486/536] chore(deps): update dependency certifi to v2025.7.9 (#2232) Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5b342fe5c..447e92c81 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.6.15 +certifi==2025.7.9 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' From d44bf0231e6e96369e4e03667a3f96618fb664e2 Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Fri, 11 Jul 2025 10:10:58 -0700 Subject: [PATCH 487/536] feat: add total slot ms to RowIterator (#2233) * feat: add total slot ms to RowIterator * format fix --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/_job_helpers.py | 1 + google/cloud/bigquery/client.py | 4 ++++ google/cloud/bigquery/job/query.py | 1 + google/cloud/bigquery/query.py | 14 ++++++++++++++ google/cloud/bigquery/table.py | 7 +++++++ tests/unit/job/test_query.py | 2 ++ tests/unit/test_client.py | 2 ++ tests/unit/test_query.py | 16 ++++++++++++++++ 8 files changed, 47 insertions(+) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 888dc1e73..73d4f6e7b 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -560,6 +560,7 @@ def do_query(): num_dml_affected_rows=query_results.num_dml_affected_rows, query=query, total_bytes_processed=query_results.total_bytes_processed, + slot_millis=query_results.slot_millis, ) if job_retry is not None: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 811e9ef03..804f77ea2 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -4144,6 +4144,7 @@ def _list_rows_from_query_results( num_dml_affected_rows: Optional[int] = None, query: Optional[str] = None, total_bytes_processed: Optional[int] = None, + slot_millis: Optional[int] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4195,6 +4196,8 @@ def _list_rows_from_query_results( The query text used. total_bytes_processed (Optional[int]): total bytes processed from job statistics, if present. + slot_millis (Optional[int]): + Number of slot ms the user is actually billed for. Returns: google.cloud.bigquery.table.RowIterator: @@ -4234,6 +4237,7 @@ def _list_rows_from_query_results( num_dml_affected_rows=num_dml_affected_rows, query=query, total_bytes_processed=total_bytes_processed, + slot_millis=slot_millis, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 4d95f0e71..ec9379ea9 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1766,6 +1766,7 @@ def is_job_done(): num_dml_affected_rows=self._query_results.num_dml_affected_rows, query=self.query, total_bytes_processed=self.total_bytes_processed, + slot_millis=self.slot_millis, **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 8745c09f5..4a006d621 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1282,6 +1282,20 @@ def total_bytes_processed(self): if total_bytes_processed is not None: return int(total_bytes_processed) + @property + def slot_millis(self): + """Total number of slot ms the user is actually billed for. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.slot_millis + + Returns: + Optional[int]: Count generated on the server (None until set by the server). + """ + slot_millis = self._properties.get("totalSlotMs") + if slot_millis is not None: + return int(slot_millis) + @property def num_dml_affected_rows(self): """Total number of rows affected by a DML query. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 861f806b4..dbdde36d1 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1812,6 +1812,7 @@ def __init__( num_dml_affected_rows: Optional[int] = None, query: Optional[str] = None, total_bytes_processed: Optional[int] = None, + slot_millis: Optional[int] = None, ): super(RowIterator, self).__init__( client, @@ -1841,6 +1842,7 @@ def __init__( self._num_dml_affected_rows = num_dml_affected_rows self._query = query self._total_bytes_processed = total_bytes_processed + self._slot_millis = slot_millis @property def _billing_project(self) -> Optional[str]: @@ -1898,6 +1900,11 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed + @property + def slot_millis(self) -> Optional[int]: + """Number of slot ms the user is actually billed for.""" + return self._slot_millis + def _is_almost_completely_cached(self): """Check if all results are completely cached. diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 46b802aa3..7201adb55 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -888,6 +888,7 @@ def test_result_reloads_job_state_until_done(self): job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234) + job_resource_done["statistics"]["query"]["totalSlotMs"] = str(5678) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -969,6 +970,7 @@ def test_result_reloads_job_state_until_done(self): self.assertEqual(result.total_rows, 1) self.assertEqual(result.query, job.query) self.assertEqual(result.total_bytes_processed, 1234) + self.assertEqual(result.slot_millis, 5678) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 42bfc84b9..bb86ccc3c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5718,6 +5718,7 @@ def test_query_and_wait_defaults(self): "rows": [{"f": [{"v": "5552452"}]}], "queryId": "job_abcDEF_", "totalBytesProcessed": 1234, + "totalSlotMs": 5678, } creds = _make_credentials() http = object() @@ -5735,6 +5736,7 @@ def test_query_and_wait_defaults(self): self.assertIsNone(rows.location) self.assertEqual(rows.query, query) self.assertEqual(rows.total_bytes_processed, 1234) + self.assertEqual(rows.slot_millis, 5678) # Verify the request we send is to jobs.query. conn.api_request.assert_called_once() diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 0d967bdb8..2b704d3c9 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -2000,6 +2000,22 @@ def test_total_bytes_processed_present_string(self): query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) + def test_slot_millis_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.slot_millis) + + def test_slot_millis_present_integer(self): + resource = self._make_resource() + resource["totalSlotMs"] = 123456 + query = self._make_one(resource) + self.assertEqual(query.slot_millis, 123456) + + def test_slot_millis_present_string(self): + resource = self._make_resource() + resource["totalSlotMs"] = "123456" + query = self._make_one(resource) + self.assertEqual(query.slot_millis, 123456) + def test_num_dml_affected_rows_missing(self): query = self._make_one(self._make_resource()) self.assertIsNone(query.num_dml_affected_rows) From 7d3182802deccfceb0646b87fc8d12275d0a569b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 11 Jul 2025 20:29:28 -0400 Subject: [PATCH 488/536] feat: adds date_format to load job and external config (#2231) * feat: adds date_format to load job and external config * adds date_format to new to/from_api_repr tests --- google/cloud/bigquery/external_config.py | 14 ++++++++++++++ google/cloud/bigquery/job/load.py | 20 ++++++++++++++++++++ tests/unit/job/test_load.py | 14 +++++++++----- tests/unit/job/test_load_config.py | 19 +++++++++++++++++++ tests/unit/test_external_config.py | 7 +++++-- 5 files changed, 67 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index fcfcaca20..54b7bf396 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -848,6 +848,20 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Format used to parse DATE values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format + """ + result = self._properties.get("dateFormat") + return typing.cast(str, result) + + @date_format.setter + def date_format(self, value: Optional[str]): + self._properties["dateFormat"] = value + @property def time_zone(self) -> Optional[str]: """Optional[str]: Time zone used when parsing timestamp values that do not diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 5d49aef18..277478d81 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -548,6 +548,19 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATE values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.date_format + """ + return self._get_sub_prop("dateFormat") + + @date_format.setter + def date_format(self, value: Optional[str]): + self._set_sub_prop("dateFormat", value) + @property def time_zone(self) -> Optional[str]: """Optional[str]: Default time zone that will apply when parsing timestamp @@ -903,6 +916,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def date_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.date_format`. + """ + return self.configuration.date_format + @property def time_zone(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 81d8e44b4..82baa03c7 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -37,14 +37,14 @@ def _setUpConstants(self): self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" - + self.DATE_FORMAT = "%Y-%m-%d" self.TIME_ZONE = "UTC" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] - + config["dateFormat"] = self.DATE_FORMAT config["timeZone"] = self.TIME_ZONE config["destinationTable"] = { "projectId": self.PROJECT, @@ -147,7 +147,6 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.reference_file_schema_uri) - if "destinationEncryptionConfiguration" in config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( @@ -156,6 +155,10 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "dateFormat" in config: + self.assertEqual(job.date_format, config["dateFormat"]) + else: + self.assertIsNone(job.date_format) if "timeZone" in config: self.assertEqual(job.time_zone, config["timeZone"]) else: @@ -202,7 +205,7 @@ def test_ctor(self): self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) - + self.assertIsNone(job.date_format) self.assertIsNone(job.time_zone) def test_ctor_w_config(self): @@ -599,6 +602,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION @@ -628,7 +632,7 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" - + config.date_format = self.DATE_FORMAT config.time_zone = self.TIME_ZONE with mock.patch( diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 6424f7e68..5b7f8175b 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -828,6 +828,22 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_date_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.date_format) + + def test_date_format_hit(self): + date_format = "%Y-%m-%d" + config = self._get_target_class()() + config._properties["load"]["dateFormat"] = date_format + self.assertEqual(config.date_format, date_format) + + def test_date_format_setter(self): + date_format = "YYYY/MM/DD" + config = self._get_target_class()() + config.date_format = date_format + self.assertEqual(config._properties["load"]["dateFormat"], date_format) + def test_time_zone_missing(self): config = self._get_target_class()() self.assertIsNone(config.time_zone) @@ -942,6 +958,7 @@ def test_column_name_character_map_none(self): }, "useAvroLogicalTypes": True, "writeDisposition": "WRITE_TRUNCATE", + "dateFormat": "%Y-%m-%d", "timeZone": "America/New_York", "parquetOptions": {"enableListInference": True}, "columnNameCharacterMap": "V2", @@ -983,6 +1000,7 @@ def test_from_api_repr(self): ) self.assertTrue(config.use_avro_logical_types) self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE) + self.assertEqual(config.date_format, "%Y-%m-%d") self.assertEqual(config.time_zone, "America/New_York") self.assertTrue(config.parquet_options.enable_list_inference) self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) @@ -1017,6 +1035,7 @@ def test_to_api_repr(self): ) config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.date_format = "%Y-%m-%d" config.time_zone = "America/New_York" parquet_options = ParquetOptions() parquet_options.enable_list_inference = True diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index a89b7a1fb..0f5d09504 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -25,7 +25,7 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] - + DATE_FORMAT = "MM/DD/YYYY" TIME_ZONE = "America/Los_Angeles" BASE_RESOURCE = { @@ -35,6 +35,7 @@ class TestExternalConfig(unittest.TestCase): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "dateFormat": DATE_FORMAT, "timeZone": TIME_ZONE, } @@ -82,6 +83,7 @@ def test_to_api_repr_base(self): ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] + ec.date_format = self.DATE_FORMAT ec.time_zone = self.TIME_ZONE exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] @@ -96,6 +98,7 @@ def test_to_api_repr_base(self): "compression": "compression", "connectionId": "path/to/connection", "schema": exp_schema, + "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, } self.assertEqual(got_resource, exp_resource) @@ -132,7 +135,7 @@ def _verify_base(self, ec): self.assertEqual(ec.ignore_unknown_values, False) self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) - + self.assertEqual(ec.date_format, self.DATE_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) def test_to_api_repr_source_format(self): From 371ad292df537278767dba71d81822ed57dd8e7d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 14 Jul 2025 12:14:58 -0400 Subject: [PATCH 489/536] feat: adds time_format and timestamp_format and associated tests (#2238) --- google/cloud/bigquery/external_config.py | 28 +++++++++++++++++ google/cloud/bigquery/job/load.py | 40 ++++++++++++++++++++++++ tests/unit/job/test_load.py | 20 ++++++++++++ tests/unit/job/test_load_config.py | 34 ++++++++++++++++++++ tests/unit/test_external_config.py | 11 +++++++ 5 files changed, 133 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 54b7bf396..370f62c0a 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -879,6 +879,34 @@ def time_zone(self) -> Optional[str]: def time_zone(self, value: Optional[str]): self._properties["timeZone"] = value + @property + def time_format(self) -> Optional[str]: + """Optional[str]: Format used to parse TIME values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_format + """ + result = self._properties.get("timeFormat") + return typing.cast(str, result) + + @time_format.setter + def time_format(self, value: Optional[str]): + self._properties["timeFormat"] = value + + @property + def timestamp_format(self) -> Optional[str]: + """Optional[str]: Format used to parse TIMESTAMP values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.timestamp_format + """ + result = self._properties.get("timestampFormat") + return typing.cast(str, result) + + @timestamp_format.setter + def timestamp_format(self, value: Optional[str]): + self._properties["timestampFormat"] = value + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 277478d81..2e5a9a9bb 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -575,6 +575,32 @@ def time_zone(self) -> Optional[str]: def time_zone(self, value: Optional[str]): self._set_sub_prop("timeZone", value) + @property + def time_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIME values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_format + """ + return self._get_sub_prop("timeFormat") + + @time_format.setter + def time_format(self, value: Optional[str]): + self._set_sub_prop("timeFormat", value) + + @property + def timestamp_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIMESTAMP values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.timestamp_format + """ + return self._get_sub_prop("timestampFormat") + + @timestamp_format.setter + def timestamp_format(self, value: Optional[str]): + self._set_sub_prop("timestampFormat", value) + @property def time_partitioning(self): """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based @@ -930,6 +956,20 @@ def time_zone(self): """ return self.configuration.time_zone + @property + def time_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_format`. + """ + return self.configuration.time_format + + @property + def timestamp_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.timestamp_format`. + """ + return self.configuration.timestamp_format + @property def schema_update_options(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 82baa03c7..77adf0cc8 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -39,6 +39,8 @@ def _setUpConstants(self): self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" self.DATE_FORMAT = "%Y-%m-%d" self.TIME_ZONE = "UTC" + self.TIME_FORMAT = "%H:%M:%S" + self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -46,6 +48,9 @@ def _make_resource(self, started=False, ended=False): config["sourceUris"] = [self.SOURCE1] config["dateFormat"] = self.DATE_FORMAT config["timeZone"] = self.TIME_ZONE + config["timeFormat"] = self.TIME_FORMAT + config["timestampFormat"] = self.TIMESTAMP_FORMAT + config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, @@ -163,6 +168,14 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.time_zone, config["timeZone"]) else: self.assertIsNone(job.time_zone) + if "timeFormat" in config: + self.assertEqual(job.time_format, config["timeFormat"]) + else: + self.assertIsNone(job.time_format) + if "timestampFormat" in config: + self.assertEqual(job.timestamp_format, config["timestampFormat"]) + else: + self.assertIsNone(job.timestamp_format) def test_ctor(self): client = _make_client(project=self.PROJECT) @@ -207,6 +220,8 @@ def test_ctor(self): self.assertIsNone(job.reference_file_schema_uri) self.assertIsNone(job.date_format) self.assertIsNone(job.time_zone) + self.assertIsNone(job.time_format) + self.assertIsNone(job.timestamp_format) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -604,7 +619,10 @@ def test_begin_w_alternate_client(self): "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, + "timeFormat": self.TIME_FORMAT, + "timestampFormat": self.TIMESTAMP_FORMAT, } + RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) @@ -634,6 +652,8 @@ def test_begin_w_alternate_client(self): config.reference_file_schema_uri = "gs://path/to/reference" config.date_format = self.DATE_FORMAT config.time_zone = self.TIME_ZONE + config.time_format = self.TIME_FORMAT + config.timestamp_format = self.TIMESTAMP_FORMAT with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 5b7f8175b..b733bdda0 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -860,6 +860,40 @@ def test_time_zone_setter(self): config.time_zone = time_zone self.assertEqual(config._properties["load"]["timeZone"], time_zone) + def test_time_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.time_format) + + def test_time_format_hit(self): + time_format = "%H:%M:%S" + config = self._get_target_class()() + config._properties["load"]["timeFormat"] = time_format + self.assertEqual(config.time_format, time_format) + + def test_time_format_setter(self): + time_format = "HH24:MI:SS" + config = self._get_target_class()() + config.time_format = time_format + self.assertEqual(config._properties["load"]["timeFormat"], time_format) + + def test_timestamp_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.timestamp_format) + + def test_timestamp_format_hit(self): + timestamp_format = "%Y-%m-%dT%H:%M:%S.%fZ" + config = self._get_target_class()() + config._properties["load"]["timestampFormat"] = timestamp_format + self.assertEqual(config.timestamp_format, timestamp_format) + + def test_timestamp_format_setter(self): + timestamp_format = "YYYY/MM/DD HH24:MI:SS.FF6 TZR" + config = self._get_target_class()() + config.timestamp_format = timestamp_format + self.assertEqual( + config._properties["load"]["timestampFormat"], timestamp_format + ) + def test_parquet_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.parquet_options) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 0f5d09504..8b41cd8e3 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -27,6 +27,8 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] DATE_FORMAT = "MM/DD/YYYY" TIME_ZONE = "America/Los_Angeles" + TIME_FORMAT = "HH24:MI:SS" + TIMESTAMP_FORMAT = "MM/DD/YYYY HH24:MI:SS.FF6 TZR" BASE_RESOURCE = { "sourceFormat": "", @@ -37,6 +39,8 @@ class TestExternalConfig(unittest.TestCase): "compression": "compression", "dateFormat": DATE_FORMAT, "timeZone": TIME_ZONE, + "timeFormat": TIME_FORMAT, + "timestampFormat": TIMESTAMP_FORMAT, } def test_from_api_repr_base(self): @@ -85,6 +89,9 @@ def test_to_api_repr_base(self): ec.date_format = self.DATE_FORMAT ec.time_zone = self.TIME_ZONE + ec.time_format = self.TIME_FORMAT + ec.timestamp_format = self.TIMESTAMP_FORMAT + exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } @@ -100,6 +107,8 @@ def test_to_api_repr_base(self): "schema": exp_schema, "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, + "timeFormat": self.TIME_FORMAT, + "timestampFormat": self.TIMESTAMP_FORMAT, } self.assertEqual(got_resource, exp_resource) @@ -137,6 +146,8 @@ def _verify_base(self, ec): self.assertEqual(ec.source_uris, self.SOURCE_URIS) self.assertEqual(ec.date_format, self.DATE_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) + self.assertEqual(ec.time_format, self.TIME_FORMAT) + self.assertEqual(ec.timestamp_format, self.TIMESTAMP_FORMAT) def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") From 54d3dc66244d50a031e3c80d43d372d2743ecbc3 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 05:22:44 -0400 Subject: [PATCH 490/536] feat: adds datetime_format as an option (#2236) * feat: adds datetime_format as an option * updates docstrings --- google/cloud/bigquery/external_config.py | 15 +++++++++++++++ google/cloud/bigquery/job/load.py | 20 ++++++++++++++++++++ tests/unit/job/test_load.py | 9 +++++++++ tests/unit/job/test_load_config.py | 16 ++++++++++++++++ tests/unit/test_external_config.py | 5 +++++ 5 files changed, 65 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 370f62c0a..82c6a9e75 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -862,6 +862,21 @@ def date_format(self) -> Optional[str]: def date_format(self, value: Optional[str]): self._properties["dateFormat"] = value + @property + def datetime_format(self) -> Optional[str]: + """Optional[str]: Format used to parse DATETIME values. Supports C-style + and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.datetime_format + """ + result = self._properties.get("datetimeFormat") + return typing.cast(str, result) + + @datetime_format.setter + def datetime_format(self, value: Optional[str]): + self._properties["datetimeFormat"] = value + @property def time_zone(self) -> Optional[str]: """Optional[str]: Time zone used when parsing timestamp values that do not diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 2e5a9a9bb..3be914f43 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -561,6 +561,19 @@ def date_format(self) -> Optional[str]: def date_format(self, value: Optional[str]): self._set_sub_prop("dateFormat", value) + @property + def datetime_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATETIME values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.datetime_format + """ + return self._get_sub_prop("datetimeFormat") + + @datetime_format.setter + def datetime_format(self, value: Optional[str]): + self._set_sub_prop("datetimeFormat", value) + @property def time_zone(self) -> Optional[str]: """Optional[str]: Default time zone that will apply when parsing timestamp @@ -949,6 +962,13 @@ def date_format(self): """ return self.configuration.date_format + @property + def datetime_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.datetime_format`. + """ + return self.configuration.datetime_format + @property def time_zone(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 77adf0cc8..7afe9cba6 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -38,6 +38,7 @@ def _setUpConstants(self): self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" self.DATE_FORMAT = "%Y-%m-%d" + self.DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" self.TIME_ZONE = "UTC" self.TIME_FORMAT = "%H:%M:%S" self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" @@ -47,6 +48,7 @@ def _make_resource(self, started=False, ended=False): config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] config["dateFormat"] = self.DATE_FORMAT + config["datetimeFormat"] = self.DATETIME_FORMAT config["timeZone"] = self.TIME_ZONE config["timeFormat"] = self.TIME_FORMAT config["timestampFormat"] = self.TIMESTAMP_FORMAT @@ -164,6 +166,10 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.date_format, config["dateFormat"]) else: self.assertIsNone(job.date_format) + if "datetimeFormat" in config: + self.assertEqual(job.datetime_format, config["datetimeFormat"]) + else: + self.assertIsNone(job.datetime_format) if "timeZone" in config: self.assertEqual(job.time_zone, config["timeZone"]) else: @@ -219,6 +225,7 @@ def test_ctor(self): self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) self.assertIsNone(job.date_format) + self.assertIsNone(job.datetime_format) self.assertIsNone(job.time_zone) self.assertIsNone(job.time_format) self.assertIsNone(job.timestamp_format) @@ -618,6 +625,7 @@ def test_begin_w_alternate_client(self): }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], "dateFormat": self.DATE_FORMAT, + "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, "timeFormat": self.TIME_FORMAT, "timestampFormat": self.TIMESTAMP_FORMAT, @@ -651,6 +659,7 @@ def test_begin_w_alternate_client(self): config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" config.date_format = self.DATE_FORMAT + config.datetime_format = self.DATETIME_FORMAT config.time_zone = self.TIME_ZONE config.time_format = self.TIME_FORMAT config.timestamp_format = self.TIMESTAMP_FORMAT diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index b733bdda0..dbb062486 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -844,6 +844,22 @@ def test_date_format_setter(self): config.date_format = date_format self.assertEqual(config._properties["load"]["dateFormat"], date_format) + def test_datetime_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.datetime_format) + + def test_datetime_format_hit(self): + datetime_format = "%Y-%m-%dT%H:%M:%S" + config = self._get_target_class()() + config._properties["load"]["datetimeFormat"] = datetime_format + self.assertEqual(config.datetime_format, datetime_format) + + def test_datetime_format_setter(self): + datetime_format = "YYYY/MM/DD HH24:MI:SS" + config = self._get_target_class()() + config.datetime_format = datetime_format + self.assertEqual(config._properties["load"]["datetimeFormat"], datetime_format) + def test_time_zone_missing(self): config = self._get_target_class()() self.assertIsNone(config.time_zone) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 8b41cd8e3..3a441d1f5 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -26,6 +26,7 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] DATE_FORMAT = "MM/DD/YYYY" + DATETIME_FORMAT = "MM/DD/YYYY HH24:MI:SS" TIME_ZONE = "America/Los_Angeles" TIME_FORMAT = "HH24:MI:SS" TIMESTAMP_FORMAT = "MM/DD/YYYY HH24:MI:SS.FF6 TZR" @@ -38,6 +39,7 @@ class TestExternalConfig(unittest.TestCase): "ignoreUnknownValues": False, "compression": "compression", "dateFormat": DATE_FORMAT, + "datetimeFormat": DATETIME_FORMAT, "timeZone": TIME_ZONE, "timeFormat": TIME_FORMAT, "timestampFormat": TIMESTAMP_FORMAT, @@ -88,6 +90,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] ec.date_format = self.DATE_FORMAT + ec.datetime_format = self.DATETIME_FORMAT ec.time_zone = self.TIME_ZONE ec.time_format = self.TIME_FORMAT ec.timestamp_format = self.TIMESTAMP_FORMAT @@ -106,6 +109,7 @@ def test_to_api_repr_base(self): "connectionId": "path/to/connection", "schema": exp_schema, "dateFormat": self.DATE_FORMAT, + "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, "timeFormat": self.TIME_FORMAT, "timestampFormat": self.TIMESTAMP_FORMAT, @@ -145,6 +149,7 @@ def _verify_base(self, ec): self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) self.assertEqual(ec.date_format, self.DATE_FORMAT) + self.assertEqual(ec.datetime_format, self.DATETIME_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) self.assertEqual(ec.time_format, self.TIME_FORMAT) self.assertEqual(ec.timestamp_format, self.TIMESTAMP_FORMAT) From 69a2c2bfd89914605d53aefc78bd6e45c38c578f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 15 Jul 2025 11:43:28 +0200 Subject: [PATCH 491/536] chore(deps): update dependency certifi to v2025.7.14 (#2237) Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 447e92c81..e932625b8 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.7.9 +certifi==2025.7.14 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' From 289446dd8c356d11a0b63b8e6275629b1ae5dc08 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 14:32:47 -0400 Subject: [PATCH 492/536] feat: Add null_markers property to LoadJobConfig and CSVOptions (#2239) * feat: Add null_markers property to LoadJobConfig and CSVOptions * feat: adds null_markers as a load and external_config option --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/external_config.py | 21 ++++++++++++++++++ google/cloud/bigquery/job/load.py | 28 ++++++++++++++++++++++++ tests/unit/job/test_load.py | 7 ++++++ tests/unit/job/test_load_config.py | 16 ++++++++++++++ tests/unit/test_external_config.py | 4 ++++ 5 files changed, 76 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 82c6a9e75..69ed72bc9 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -474,6 +474,27 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) + @property + def null_markers(self) -> Optional[Iterable[str]]: + """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file. + + .. note:: + null_marker and null_markers can't be set at the same time. + If null_marker is set, null_markers has to be not set. + If null_markers is set, null_marker has to be not set. + If both null_marker and null_markers are set at the same time, a user error would be thrown. + Any strings listed in null_markers, including empty string would be interpreted as SQL NULL. + This applies to all column types. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers + """ + return self._properties.get("nullMarkers") + + @null_markers.setter + def null_markers(self, value: Optional[Iterable[str]]): + self._properties["nullMarkers"] = value + def to_api_repr(self) -> dict: """Build an API representation of this object. diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 3be914f43..eabc12cfc 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -386,6 +386,27 @@ def null_marker(self): def null_marker(self, value): self._set_sub_prop("nullMarker", value) + @property + def null_markers(self) -> Optional[List[str]]: + """Optional[List[str]]: A list of strings represented as SQL NULL values in a CSV file. + + .. note:: + null_marker and null_markers can't be set at the same time. + If null_marker is set, null_markers has to be not set. + If null_markers is set, null_marker has to be not set. + If both null_marker and null_markers are set at the same time, a user error would be thrown. + Any strings listed in null_markers, including empty string would be interpreted as SQL NULL. + This applies to all column types. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_markers + """ + return self._get_sub_prop("nullMarkers") + + @null_markers.setter + def null_markers(self, value: Optional[List[str]]): + self._set_sub_prop("nullMarkers", value) + @property def preserve_ascii_control_characters(self): """Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV. @@ -854,6 +875,13 @@ def null_marker(self): """ return self.configuration.null_marker + @property + def null_markers(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.null_markers`. + """ + return self.configuration.null_markers + @property def quote_character(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 7afe9cba6..5d52401c9 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -42,6 +42,7 @@ def _setUpConstants(self): self.TIME_ZONE = "UTC" self.TIME_FORMAT = "%H:%M:%S" self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" + self.NULL_MARKERS = ["", "NA"] def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -52,6 +53,7 @@ def _make_resource(self, started=False, ended=False): config["timeZone"] = self.TIME_ZONE config["timeFormat"] = self.TIME_FORMAT config["timestampFormat"] = self.TIMESTAMP_FORMAT + config["nullMarkers"] = self.NULL_MARKERS config["destinationTable"] = { "projectId": self.PROJECT, @@ -140,6 +142,10 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.null_marker, config["nullMarker"]) else: self.assertIsNone(job.null_marker) + if "nullMarkers" in config: + self.assertEqual(job.null_markers, config["nullMarkers"]) + else: + self.assertIsNone(job.null_markers) if "quote" in config: self.assertEqual(job.quote_character, config["quote"]) else: @@ -211,6 +217,7 @@ def test_ctor(self): self.assertIsNone(job.ignore_unknown_values) self.assertIsNone(job.max_bad_records) self.assertIsNone(job.null_marker) + self.assertIsNone(job.null_markers) self.assertIsNone(job.quote_character) self.assertIsNone(job.skip_leading_rows) self.assertIsNone(job.source_format) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index dbb062486..8ff9244d2 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -469,6 +469,22 @@ def test_null_marker_setter(self): config.null_marker = null_marker self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + def test_null_markers_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.null_markers) + + def test_null_markers_hit(self): + null_markers = ["", "NA"] + config = self._get_target_class()() + config._properties["load"]["nullMarkers"] = null_markers + self.assertEqual(config.null_markers, null_markers) + + def test_null_markers_setter(self): + null_markers = ["", "NA"] + config = self._get_target_class()() + config.null_markers = null_markers + self.assertEqual(config._properties["load"]["nullMarkers"], null_markers) + def test_preserve_ascii_control_characters_missing(self): config = self._get_target_class()() self.assertIsNone(config.preserve_ascii_control_characters) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 3a441d1f5..61532b4b8 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -277,6 +277,7 @@ def test_from_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "nullMarkers": ["", "NA"], }, }, ) @@ -293,6 +294,7 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") self.assertEqual(ec.options.preserve_ascii_control_characters, False) + self.assertEqual(ec.options.null_markers, ["", "NA"]) got_resource = ec.to_api_repr() @@ -314,6 +316,7 @@ def test_to_api_repr_csv(self): options.skip_leading_rows = 123 options.allow_jagged_rows = False options.preserve_ascii_control_characters = False + options.null_markers = ["", "NA"] ec.csv_options = options exp_resource = { @@ -326,6 +329,7 @@ def test_to_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "nullMarkers": ["", "NA"], }, } From 6d5d23685cd457d85955356705c1101e9ec3cdcd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 18:04:24 -0400 Subject: [PATCH 493/536] feat: Adds source_column_match and associated tests (#2227) This commit introduces new configuration options for BigQuery load jobs and external table definitions, aligning with recent updates to the underlying protos. New options added: - `source_column_name_match_option`: Controls how source columns are matched to the schema. (Applies to LoadJobConfig, ExternalConfig, LoadJob) Changes include: - Added corresponding properties (getters/setters) to `LoadJobConfig`, `LoadJob`, `ExternalConfig`, and `CSVOptions`. - Updated docstrings and type hints for all new attributes. - Updated unit tests to cover the new options, ensuring they are correctly handled during object initialization, serialization to API representation, and deserialization from API responses. --- google/cloud/bigquery/enums.py | 18 ++++++++++ google/cloud/bigquery/external_config.py | 34 +++++++++++++++++++ google/cloud/bigquery/job/load.py | 43 +++++++++++++++++++++++- tests/unit/job/test_load.py | 15 +++++++++ tests/unit/job/test_load_config.py | 32 ++++++++++++++++++ tests/unit/test_external_config.py | 43 +++++++++++++++++++++++- 6 files changed, 183 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 9a1e4880c..1b1eb241a 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -462,3 +462,21 @@ class JobCreationMode(object): The conditions under which BigQuery can decide to not create a Job are subject to change. """ + + +class SourceColumnMatch(str, enum.Enum): + """Uses sensible defaults based on how the schema is provided. + If autodetect is used, then columns are matched by name. Otherwise, columns + are matched by position. This is done to keep the behavior backward-compatible. + """ + + SOURCE_COLUMN_MATCH_UNSPECIFIED = "SOURCE_COLUMN_MATCH_UNSPECIFIED" + """Unspecified column name match option.""" + + POSITION = "POSITION" + """Matches by position. This assumes that the columns are ordered the same + way as the schema.""" + + NAME = "NAME" + """Matches by name. This reads the header row as column names and reorders + columns to match the field names in the schema.""" diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 69ed72bc9..dc7a33e6a 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -30,6 +30,7 @@ from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery import _helpers +from google.cloud.bigquery.enums import SourceColumnMatch from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery import schema from google.cloud.bigquery.schema import SchemaField @@ -474,6 +475,39 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the + strategy used to match loaded columns to the schema. If not set, a sensible + default is chosen based on how the schema is provided. If autodetect is + used, then columns are matched by name. Otherwise, columns are matched by + position. This is done to keep the behavior backward-compatible. + + Acceptable values are: + + SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. + POSITION: matches by position. This assumes that the columns are ordered + the same way as the schema. + NAME: matches by name. This reads the header row as column names and + reorders columns to match the field names in the schema. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.source_column_match + """ + + value = self._properties.get("sourceColumnMatch") + return SourceColumnMatch(value) if value is not None else None + + @source_column_match.setter + def source_column_match(self, value: Union[SourceColumnMatch, str, None]): + if value is not None and not isinstance(value, (SourceColumnMatch, str)): + raise TypeError( + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None" + ) + if isinstance(value, SourceColumnMatch): + value = value.value + self._properties["sourceColumnMatch"] = value if value else None + @property def null_markers(self) -> Optional[Iterable[str]]: """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file. diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index eabc12cfc..8cdb779ac 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -15,9 +15,10 @@ """Classes for load jobs.""" import typing -from typing import FrozenSet, List, Iterable, Optional +from typing import FrozenSet, List, Iterable, Optional, Union from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.enums import SourceColumnMatch from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery import _helpers @@ -569,6 +570,39 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the + strategy used to match loaded columns to the schema. If not set, a sensible + default is chosen based on how the schema is provided. If autodetect is + used, then columns are matched by name. Otherwise, columns are matched by + position. This is done to keep the behavior backward-compatible. + + Acceptable values are: + + SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. + POSITION: matches by position. This assumes that the columns are ordered + the same way as the schema. + NAME: matches by name. This reads the header row as column names and + reorders columns to match the field names in the schema. + + See: + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_column_match + """ + value = self._get_sub_prop("sourceColumnMatch") + return SourceColumnMatch(value) if value is not None else None + + @source_column_match.setter + def source_column_match(self, value: Union[SourceColumnMatch, str, None]): + if value is not None and not isinstance(value, (SourceColumnMatch, str)): + raise TypeError( + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None" + ) + if isinstance(value, SourceColumnMatch): + value = value.value + self._set_sub_prop("sourceColumnMatch", value if value else None) + @property def date_format(self) -> Optional[str]: """Optional[str]: Date format used for parsing DATE values. @@ -983,6 +1017,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_column_match`. + """ + return self.configuration.source_column_match + @property def date_format(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 5d52401c9..b551d52dd 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -19,6 +19,7 @@ from .helpers import _Base from .helpers import _make_client +from google.cloud.bigquery.enums import SourceColumnMatch class TestLoadJob(_Base): @@ -37,6 +38,7 @@ def _setUpConstants(self): self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" + self.SOURCE_COLUMN_MATCH = "NAME" self.DATE_FORMAT = "%Y-%m-%d" self.DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" self.TIME_ZONE = "UTC" @@ -48,6 +50,7 @@ def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] + config["sourceColumnMatch"] = self.SOURCE_COLUMN_MATCH config["dateFormat"] = self.DATE_FORMAT config["datetimeFormat"] = self.DATETIME_FORMAT config["timeZone"] = self.TIME_ZONE @@ -189,6 +192,15 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.timestamp_format) + if "sourceColumnMatch" in config: + # job.source_column_match will be an Enum, config[...] is a string + self.assertEqual( + job.source_column_match.value, + config["sourceColumnMatch"], + ) + else: + self.assertIsNone(job.source_column_match) + def test_ctor(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -231,6 +243,7 @@ def test_ctor(self): self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) + self.assertIsNone(job.source_column_match) self.assertIsNone(job.date_format) self.assertIsNone(job.datetime_format) self.assertIsNone(job.time_zone) @@ -631,6 +644,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "dateFormat": self.DATE_FORMAT, "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, @@ -665,6 +679,7 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" + config.source_column_match = SourceColumnMatch(self.SOURCE_COLUMN_MATCH) config.date_format = self.DATE_FORMAT config.datetime_format = self.DATETIME_FORMAT config.time_zone = self.TIME_ZONE diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 8ff9244d2..27d3cead1 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -844,6 +844,38 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_source_column_match_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.source_column_match) + + def test_source_column_match_hit(self): + from google.cloud.bigquery.enums import SourceColumnMatch + + option_enum = SourceColumnMatch.NAME + config = self._get_target_class()() + # Assume API stores the string value of the enum + config._properties["load"]["sourceColumnMatch"] = option_enum.value + self.assertEqual(config.source_column_match, option_enum) + + def test_source_column_match_setter(self): + from google.cloud.bigquery.enums import SourceColumnMatch + + option_enum = SourceColumnMatch.POSITION + config = self._get_target_class()() + config.source_column_match = option_enum + # Assert that the string value of the enum is stored + self.assertEqual( + config._properties["load"]["sourceColumnMatch"], option_enum.value + ) + option_str = "NAME" + config.source_column_match = option_str + self.assertEqual(config._properties["load"]["sourceColumnMatch"], option_str) + + def test_source_column_match_setter_invalid_type(self): + config = self._get_target_class()() + with self.assertRaises(TypeError): + config.source_column_match = 3.14 + def test_date_format_missing(self): config = self._get_target_class()() self.assertIsNone(config.date_format) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 61532b4b8..ea827a560 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -19,12 +19,14 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +from google.cloud.bigquery.enums import SourceColumnMatch import pytest class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] + SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME DATE_FORMAT = "MM/DD/YYYY" DATETIME_FORMAT = "MM/DD/YYYY HH24:MI:SS" TIME_ZONE = "America/Los_Angeles" @@ -277,6 +279,7 @@ def test_from_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "nullMarkers": ["", "NA"], }, }, @@ -294,6 +297,10 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") self.assertEqual(ec.options.preserve_ascii_control_characters, False) + self.assertEqual( + ec.options.source_column_match, + self.SOURCE_COLUMN_MATCH, + ) self.assertEqual(ec.options.null_markers, ["", "NA"]) got_resource = ec.to_api_repr() @@ -316,7 +323,9 @@ def test_to_api_repr_csv(self): options.skip_leading_rows = 123 options.allow_jagged_rows = False options.preserve_ascii_control_characters = False + options.source_column_match = self.SOURCE_COLUMN_MATCH options.null_markers = ["", "NA"] + ec.csv_options = options exp_resource = { @@ -329,6 +338,7 @@ def test_to_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "nullMarkers": ["", "NA"], }, } @@ -881,7 +891,9 @@ def test_to_api_repr(self): ) -class CSVOptions(unittest.TestCase): +class TestCSVOptions(unittest.TestCase): + SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME + def test_to_api_repr(self): options = external_config.CSVOptions() options.field_delimiter = "\t" @@ -891,6 +903,7 @@ def test_to_api_repr(self): options.allow_jagged_rows = False options.encoding = "UTF-8" options.preserve_ascii_control_characters = False + options.source_column_match = self.SOURCE_COLUMN_MATCH resource = options.to_api_repr() @@ -904,9 +917,37 @@ def test_to_api_repr(self): "allowJaggedRows": False, "encoding": "UTF-8", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, }, ) + def test_source_column_match_None(self): + ec = external_config.CSVOptions() + ec.source_column_match = None + expected = None + result = ec.source_column_match + self.assertEqual(expected, result) + + def test_source_column_match_valid_input(self): + ec = external_config.CSVOptions() + ec.source_column_match = SourceColumnMatch.NAME + expected = "NAME" + result = ec.source_column_match + self.assertEqual(expected, result) + + ec.source_column_match = "POSITION" + expected = "POSITION" + result = ec.source_column_match + self.assertEqual(expected, result) + + def test_source_column_match_invalid_input(self): + ec = external_config.CSVOptions() + with self.assertRaisesRegex( + TypeError, + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None", + ): + ec.source_column_match = 3.14 + class TestGoogleSheetsOptions(unittest.TestCase): def test_to_api_repr(self): From ef49f739998120ef348ffafdb97c22bddb323e83 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 15 Jul 2025 20:29:42 -0400 Subject: [PATCH 494/536] chore(main): release 3.35.0 (#2207) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b29a6a41..e4574aa7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.35.0](https://github.com/googleapis/python-bigquery/compare/v3.34.0...v3.35.0) (2025-07-15) + + +### Features + +* Add null_markers property to LoadJobConfig and CSVOptions ([#2239](https://github.com/googleapis/python-bigquery/issues/2239)) ([289446d](https://github.com/googleapis/python-bigquery/commit/289446dd8c356d11a0b63b8e6275629b1ae5dc08)) +* Add total slot ms to RowIterator ([#2233](https://github.com/googleapis/python-bigquery/issues/2233)) ([d44bf02](https://github.com/googleapis/python-bigquery/commit/d44bf0231e6e96369e4e03667a3f96618fb664e2)) +* Add UpdateMode to update_dataset ([#2204](https://github.com/googleapis/python-bigquery/issues/2204)) ([eb9c2af](https://github.com/googleapis/python-bigquery/commit/eb9c2aff242c5107f968bbd8b6a9d30cecc877f6)) +* Adds dataset_view parameter to get_dataset method ([#2198](https://github.com/googleapis/python-bigquery/issues/2198)) ([28a5750](https://github.com/googleapis/python-bigquery/commit/28a5750d455f0381548df6f9b1f7661823837d81)) +* Adds date_format to load job and external config ([#2231](https://github.com/googleapis/python-bigquery/issues/2231)) ([7d31828](https://github.com/googleapis/python-bigquery/commit/7d3182802deccfceb0646b87fc8d12275d0a569b)) +* Adds datetime_format as an option ([#2236](https://github.com/googleapis/python-bigquery/issues/2236)) ([54d3dc6](https://github.com/googleapis/python-bigquery/commit/54d3dc66244d50a031e3c80d43d372d2743ecbc3)) +* Adds source_column_match and associated tests ([#2227](https://github.com/googleapis/python-bigquery/issues/2227)) ([6d5d236](https://github.com/googleapis/python-bigquery/commit/6d5d23685cd457d85955356705c1101e9ec3cdcd)) +* Adds time_format and timestamp_format and associated tests ([#2238](https://github.com/googleapis/python-bigquery/issues/2238)) ([371ad29](https://github.com/googleapis/python-bigquery/commit/371ad292df537278767dba71d81822ed57dd8e7d)) +* Adds time_zone to external config and load job ([#2229](https://github.com/googleapis/python-bigquery/issues/2229)) ([b2300d0](https://github.com/googleapis/python-bigquery/commit/b2300d032843512b7e4a5703377632fe60ef3f8d)) + + +### Bug Fixes + +* Adds magics.context.project to eliminate issues with unit tests … ([#2228](https://github.com/googleapis/python-bigquery/issues/2228)) ([27ff3a8](https://github.com/googleapis/python-bigquery/commit/27ff3a89a5f97305fa3ff673aa9183baa7df200f)) +* Fix rows returned when both start_index and page_size are provided ([#2181](https://github.com/googleapis/python-bigquery/issues/2181)) ([45643a2](https://github.com/googleapis/python-bigquery/commit/45643a2e20ce5d503118522dd195aeca00dec3bc)) +* Make AccessEntry equality consistent with from_api_repr ([#2218](https://github.com/googleapis/python-bigquery/issues/2218)) ([4941de4](https://github.com/googleapis/python-bigquery/commit/4941de441cb32cabeb55ec0320f305fb62551155)) +* Update type hints for various BigQuery files ([#2206](https://github.com/googleapis/python-bigquery/issues/2206)) ([b863291](https://github.com/googleapis/python-bigquery/commit/b86329188ba35e61871db82ae1d95d2a576eed1b)) + + +### Documentation + +* Improve clarity of "Output Only" fields in Dataset class ([#2201](https://github.com/googleapis/python-bigquery/issues/2201)) ([bd5aba8](https://github.com/googleapis/python-bigquery/commit/bd5aba8ba40c2f35fb672a68eed11d6baedb304f)) + ## [3.34.0](https://github.com/googleapis/python-bigquery/compare/v3.33.0...v3.34.0) (2025-05-27) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 9e1393854..0107ae309 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.34.0" +__version__ = "3.35.0" From 4400f26dcb03826f835de8348aef25a0fcbf5115 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 16 Jul 2025 19:22:36 +0200 Subject: [PATCH 495/536] chore(deps): update all dependencies to v3.35.0 (#2242) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index a512dbd3a..54b708ca8 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e932625b8..5b85a9bfe 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index b53a35982..5c48d707f 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.1 db-dtypes==1.4.3 -google.cloud.bigquery==3.34.0 +google.cloud.bigquery==3.35.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 pandas==2.3.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 4b134ac9d..88f725bb4 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ bigquery-magics==0.10.1 db-dtypes==1.4.3 -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.4.0; python_version >= '3.10' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index fd8bd672b..e43cb04e9 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 From d207f6539b7a4c248a5de5719d7f384abbe20abe Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 21 Jul 2025 17:34:50 -0500 Subject: [PATCH 496/536] docs: specify the inherited-members directive for job classes (#2244) It seems that versions of python earlier than 3.10 may have had issues processing inherited members annotations, and accidentally include inherited members by default. As we recently worked to excise older versions of python in this repo, it seems we're now correctly processing sphinx directives, which means we no longer emit docstrings for inherited members. This PR adds a minor sphinx directive to include inherited members for the job classes, and I've confirmed locally by running the `docsfx` nox job that the inherited members do now get included in the docfx_yaml output. --- docs/reference.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/reference.rst b/docs/reference.rst index 6c00df077..d24a73596 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -22,6 +22,7 @@ Job === .. automodule:: google.cloud.bigquery.job + :inherited-members: .. toctree:: :maxdepth: 2 From 29b854823011d131b322e9fd8356cd307d188aac Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 24 Jul 2025 10:57:43 -0400 Subject: [PATCH 497/536] chore(main): release 3.35.1 (#2245) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4574aa7a..374448a5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.35.1](https://github.com/googleapis/python-bigquery/compare/v3.35.0...v3.35.1) (2025-07-21) + + +### Documentation + +* Specify the inherited-members directive for job classes ([#2244](https://github.com/googleapis/python-bigquery/issues/2244)) ([d207f65](https://github.com/googleapis/python-bigquery/commit/d207f6539b7a4c248a5de5719d7f384abbe20abe)) + ## [3.35.0](https://github.com/googleapis/python-bigquery/compare/v3.34.0...v3.35.0) (2025-07-15) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0107ae309..d565bc46e 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.35.0" +__version__ = "3.35.1" From d219989f977d9d449e4f06a65ba151f0ab2bd9a1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 26 Jul 2025 00:09:47 +0200 Subject: [PATCH 498/536] chore(deps): update all dependencies (#2243) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 54b708ca8..e7a02eca5 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5b85a9bfe..4f2c0aff4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,20 +13,20 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.73.1 +grpcio==1.74.0 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 pandas==2.3.1 proto-plus==1.26.1 -pyarrow==20.0.0 +pyarrow==21.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.22 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 5c48d707f..e72d2822e 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.1 db-dtypes==1.4.3 -google.cloud.bigquery==3.35.0 +google.cloud.bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 pandas==2.3.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 88f725bb4..c9aed9e58 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ bigquery-magics==0.10.1 db-dtypes==1.4.3 -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.4.0; python_version >= '3.10' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index e43cb04e9..afa62b6b8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 From 791c53bef3caa672c9b3c5292b4050cd1c62d6e3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 31 Jul 2025 23:54:06 +0200 Subject: [PATCH 499/536] chore(deps): update dependency matplotlib to v3.10.5 (#2251) --- samples/notebooks/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index c9aed9e58..a2f0cb44a 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -5,5 +5,5 @@ google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.4.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.3; python_version >= '3.10' +matplotlib==3.10.5; python_version >= '3.10' pandas==2.3.1 From 8f031666114a826da2ad965f8ecd4727466cb480 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Tue, 5 Aug 2025 04:41:40 -0400 Subject: [PATCH 500/536] docs: update README to break infinite redirect loop (#2254) --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 29e15e067..23ed9257d 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ processing power of Google's infrastructure. .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery -.. _Client Library Documentation: https://googleapis.dev/python/bigquery/latest +.. _Client Library Documentation: https://cloud.google.com/python/docs/reference/bigquery/latest/summary_overview .. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/ Quick Start From abc319b1bf8a11b0013b34001ca2b99caf34790c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 5 Aug 2025 14:45:23 +0200 Subject: [PATCH 501/536] chore(deps): update all dependencies (#2253) Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 4f2c0aff4..fa54cc229 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.7.14 +certifi==2025.8.3 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index e72d2822e..e7230053c 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.10.1 +bigquery_magics==0.10.2 db-dtypes==1.4.3 google.cloud.bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index a2f0cb44a..829f08f47 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.10.1 +bigquery-magics==0.10.2 db-dtypes==1.4.3 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 From 3deff1d963980800e8b79fa3aaf5b712d4fd5062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 6 Aug 2025 04:50:30 -0500 Subject: [PATCH 502/536] feat: retry query jobs if `jobBackendError` or `jobInternalError` are encountered (#2256) * feat: retry query jobs if `jobBackendError` or `jobInternalError` are encountered * Update google/cloud/bigquery/retry.py --- google/cloud/bigquery/retry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 999d0e851..8f469f2d3 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -124,6 +124,8 @@ def _should_retry_get_job_conflict(exc): "rateLimitExceeded", "backendError", "internalError", + "jobBackendError", + "jobInternalError", "jobRateLimitExceeded", ) From 0a95b24192395cc3ccf801aa9bc318999873a2bf Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Thu, 14 Aug 2025 15:28:56 -0700 Subject: [PATCH 503/536] feat: add created/started/ended properties to RowIterator. (#2260) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add created/started/ended attribute to RowIterator. * fix annotation * links update * mypy fix * Update google/cloud/bigquery/query.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/table.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/table.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/query.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/query.py Co-authored-by: Tim Sweña (Swast) * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Sweña (Swast) * fix unit test --------- Co-authored-by: Tim Sweña (Swast) Co-authored-by: Lingqing Gan --- google/cloud/bigquery/_job_helpers.py | 3 ++ google/cloud/bigquery/client.py | 12 +++++++ google/cloud/bigquery/job/query.py | 3 ++ google/cloud/bigquery/query.py | 52 ++++++++++++++++++++++++++- google/cloud/bigquery/table.py | 31 +++++++++++++++- tests/unit/job/test_query.py | 6 ++++ tests/unit/test_client.py | 6 ++++ tests/unit/test_query.py | 48 +++++++++++++++++++++++++ 8 files changed, 159 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 73d4f6e7b..aa0b115d9 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -561,6 +561,9 @@ def do_query(): query=query, total_bytes_processed=query_results.total_bytes_processed, slot_millis=query_results.slot_millis, + created=query_results.created, + started=query_results.started, + ended=query_results.ended, ) if job_retry is not None: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 804f77ea2..8048452db 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -4145,6 +4145,9 @@ def _list_rows_from_query_results( query: Optional[str] = None, total_bytes_processed: Optional[int] = None, slot_millis: Optional[int] = None, + created: Optional[datetime.datetime] = None, + started: Optional[datetime.datetime] = None, + ended: Optional[datetime.datetime] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4198,6 +4201,12 @@ def _list_rows_from_query_results( total bytes processed from job statistics, if present. slot_millis (Optional[int]): Number of slot ms the user is actually billed for. + created (Optional[datetime.datetime]): + Datetime at which the job was created. + started (Optional[datetime.datetime]): + Datetime at which the job was started. + ended (Optional[datetime.datetime]): + Datetime at which the job finished. Returns: google.cloud.bigquery.table.RowIterator: @@ -4238,6 +4247,9 @@ def _list_rows_from_query_results( query=query, total_bytes_processed=total_bytes_processed, slot_millis=slot_millis, + created=created, + started=started, + ended=ended, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index ec9379ea9..44d8a92e6 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1767,6 +1767,9 @@ def is_job_done(): query=self.query, total_bytes_processed=self.total_bytes_processed, slot_millis=self.slot_millis, + created=self.created, + started=self.started, + ended=self.ended, **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 4a006d621..58372f1e6 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1287,7 +1287,7 @@ def slot_millis(self): """Total number of slot ms the user is actually billed for. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.slot_millis + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_slot_ms Returns: Optional[int]: Count generated on the server (None until set by the server). @@ -1310,6 +1310,56 @@ def num_dml_affected_rows(self): if num_dml_affected_rows is not None: return int(num_dml_affected_rows) + @property + def created(self): + """Creation time of this query. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.creation_time + + Returns: + Optional[datetime.datetime]: + the creation time (None until set from the server). + """ + millis = self._properties.get("creationTime") + if millis is not None: + return _helpers._datetime_from_microseconds(int(millis) * 1000.0) + + @property + def started(self): + """Start time of this query. + + This field will be present when the query transitions from the + PENDING state to either RUNNING or DONE. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.start_time + + Returns: + Optional[datetime.datetime]: + the start time (None until set from the server). + """ + millis = self._properties.get("startTime") + if millis is not None: + return _helpers._datetime_from_microseconds(int(millis) * 1000.0) + + @property + def ended(self): + """End time of this query. + + This field will be present whenever a query is in the DONE state. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.end_time + + Returns: + Optional[datetime.datetime]: + the end time (None until set from the server). + """ + millis = self._properties.get("endTime") + if millis is not None: + return _helpers._datetime_from_microseconds(int(millis) * 1000.0) + @property def rows(self): """Query results. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index dbdde36d1..a0986c44e 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1788,7 +1788,15 @@ class RowIterator(HTTPIterator): query (Optional[str]): The query text used. total_bytes_processed (Optional[int]): - total bytes processed from job statistics, if present. + If representing query results, the total bytes processed by the associated query. + slot_millis (Optional[int]): + If representing query results, the number of slot ms billed for the associated query. + created (Optional[datetime.datetime]): + If representing query results, the creation time of the associated query. + started (Optional[datetime.datetime]): + If representing query results, the start time of the associated query. + ended (Optional[datetime.datetime]): + If representing query results, the end time of the associated query. """ def __init__( @@ -1813,6 +1821,9 @@ def __init__( query: Optional[str] = None, total_bytes_processed: Optional[int] = None, slot_millis: Optional[int] = None, + created: Optional[datetime.datetime] = None, + started: Optional[datetime.datetime] = None, + ended: Optional[datetime.datetime] = None, ): super(RowIterator, self).__init__( client, @@ -1843,6 +1854,9 @@ def __init__( self._query = query self._total_bytes_processed = total_bytes_processed self._slot_millis = slot_millis + self._job_created = created + self._job_started = started + self._job_ended = ended @property def _billing_project(self) -> Optional[str]: @@ -1905,6 +1919,21 @@ def slot_millis(self) -> Optional[int]: """Number of slot ms the user is actually billed for.""" return self._slot_millis + @property + def created(self) -> Optional[datetime.datetime]: + """If representing query results, the creation time of the associated query.""" + return self._job_created + + @property + def started(self) -> Optional[datetime.datetime]: + """If representing query results, the start time of the associated query.""" + return self._job_started + + @property + def ended(self) -> Optional[datetime.datetime]: + """If representing query results, the end time of the associated query.""" + return self._job_ended + def _is_almost_completely_cached(self): """Check if all results are completely cached. diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 7201adb55..8f684c3e9 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -889,6 +889,9 @@ def test_result_reloads_job_state_until_done(self): job_resource_done = self._make_resource(started=True, ended=True, location="EU") job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234) job_resource_done["statistics"]["query"]["totalSlotMs"] = str(5678) + job_resource_done["statistics"]["creationTime"] = str(11) + job_resource_done["statistics"]["startTime"] = str(22) + job_resource_done["statistics"]["endTime"] = str(33) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -971,6 +974,9 @@ def test_result_reloads_job_state_until_done(self): self.assertEqual(result.query, job.query) self.assertEqual(result.total_bytes_processed, 1234) self.assertEqual(result.slot_millis, 5678) + self.assertEqual(result.created.timestamp() * 1000, 11) + self.assertEqual(result.started.timestamp() * 1000, 22) + self.assertEqual(result.ended.timestamp() * 1000, 33) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index bb86ccc3c..c3cf33279 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5719,6 +5719,9 @@ def test_query_and_wait_defaults(self): "queryId": "job_abcDEF_", "totalBytesProcessed": 1234, "totalSlotMs": 5678, + "creationTime": "1437767599006", + "startTime": "1437767600007", + "endTime": "1437767601008", } creds = _make_credentials() http = object() @@ -5737,6 +5740,9 @@ def test_query_and_wait_defaults(self): self.assertEqual(rows.query, query) self.assertEqual(rows.total_bytes_processed, 1234) self.assertEqual(rows.slot_millis, 5678) + self.assertEqual(rows.created.timestamp() * 1000, 1437767599006) + self.assertEqual(rows.started.timestamp() * 1000, 1437767600007) + self.assertEqual(rows.ended.timestamp() * 1000, 1437767601008) # Verify the request we send is to jobs.query. conn.api_request.assert_called_once() diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 2b704d3c9..adb43bcd9 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -2016,6 +2016,54 @@ def test_slot_millis_present_string(self): query = self._make_one(resource) self.assertEqual(query.slot_millis, 123456) + def test_created_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.created) + + def test_created_present_integer(self): + resource = self._make_resource() + resource["creationTime"] = 1437767599006 + query = self._make_one(resource) + self.assertEqual(query.created.timestamp() * 1000, 1437767599006) + + def test_created_present_string(self): + resource = self._make_resource() + resource["creationTime"] = "1437767599006" + query = self._make_one(resource) + self.assertEqual(query.created.timestamp() * 1000, 1437767599006) + + def test_started_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.started) + + def test_started_present_integer(self): + resource = self._make_resource() + resource["startTime"] = 1437767599006 + query = self._make_one(resource) + self.assertEqual(query.started.timestamp() * 1000, 1437767599006) + + def test_started_present_string(self): + resource = self._make_resource() + resource["startTime"] = "1437767599006" + query = self._make_one(resource) + self.assertEqual(query.started.timestamp() * 1000, 1437767599006) + + def test_ended_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.ended) + + def test_ended_present_integer(self): + resource = self._make_resource() + resource["endTime"] = 1437767599006 + query = self._make_one(resource) + self.assertEqual(query.ended.timestamp() * 1000, 1437767599006) + + def test_ended_present_string(self): + resource = self._make_resource() + resource["endTime"] = "1437767599006" + query = self._make_one(resource) + self.assertEqual(query.ended.timestamp() * 1000, 1437767599006) + def test_num_dml_affected_rows_missing(self): query = self._make_one(self._make_resource()) self.assertIsNone(query.num_dml_affected_rows) From 6659355a57f9393772fb315ec0387ab09630c18a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 18 Aug 2025 10:39:54 -0500 Subject: [PATCH 504/536] chore: add private `_query_and_wait_bigframes` method (#2250) * chore: add private `_query_and_wait_bigframes` method Towards internal issue b/409104302 * fix unit tests * revert type hints * lint * Apply suggestions from code review Co-authored-by: Chalmer Lowe * populate created, started, ended --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/_job_helpers.py | 161 +++++++++- google/cloud/bigquery/client.py | 40 ++- google/cloud/bigquery/job/query.py | 6 + google/cloud/bigquery/query.py | 9 +- google/cloud/bigquery/table.py | 3 +- tests/unit/test_client_bigframes.py | 411 ++++++++++++++++++++++++++ 6 files changed, 619 insertions(+), 11 deletions(-) create mode 100644 tests/unit/test_client_bigframes.py diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index aa0b115d9..6fd561f8c 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -35,17 +35,22 @@ predicates where it is safe to generate a new query ID. """ +from __future__ import annotations + import copy +import dataclasses +import datetime import functools import uuid import textwrap -from typing import Any, Dict, Optional, TYPE_CHECKING, Union +from typing import Any, Callable, Dict, Optional, TYPE_CHECKING, Union import warnings import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries from google.cloud.bigquery import job +import google.cloud.bigquery.job.query import google.cloud.bigquery.query from google.cloud.bigquery import table import google.cloud.bigquery.retry @@ -116,14 +121,21 @@ def query_jobs_insert( retry: Optional[retries.Retry], timeout: Optional[float], job_retry: Optional[retries.Retry], + *, + callback: Callable = lambda _: None, ) -> job.QueryJob: """Initiate a query using jobs.insert. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + + Args: + callback (Callable): + A callback function used by bigframes to report query progress. """ job_id_given = job_id is not None job_id_save = job_id job_config_save = job_config + query_sent_factory = QuerySentEventFactory() def do_query(): # Make a copy now, so that original doesn't get changed by the process @@ -136,6 +148,16 @@ def do_query(): try: query_job._begin(retry=retry, timeout=timeout) + if job_config is not None and not job_config.dry_run: + callback( + query_sent_factory( + query=query, + billing_project=query_job.project, + location=query_job.location, + job_id=query_job.job_id, + request_id=None, + ) + ) except core_exceptions.Conflict as create_exc: # The thought is if someone is providing their own job IDs and they get # their job ID generation wrong, this could end up returning results for @@ -396,6 +418,7 @@ def query_and_wait( job_retry: Optional[retries.Retry], page_size: Optional[int] = None, max_results: Optional[int] = None, + callback: Callable = lambda _: None, ) -> table.RowIterator: """Run the query, wait for it to finish, and return the results. @@ -415,9 +438,8 @@ def query_and_wait( location (Optional[str]): Location where to run the job. Must match the location of the table used in the query as well as the destination table. - project (Optional[str]): - Project ID of the project of where to run the job. Defaults - to the client's project. + project (str): + Project ID of the project of where to run the job. api_timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -441,6 +463,8 @@ def query_and_wait( request. Non-positive values are ignored. max_results (Optional[int]): The maximum total number of rows from this request. + callback (Callable): + A callback function used by bigframes to report query progress. Returns: google.cloud.bigquery.table.RowIterator: @@ -479,12 +503,14 @@ def query_and_wait( retry=retry, timeout=api_timeout, job_retry=job_retry, + callback=callback, ), api_timeout=api_timeout, wait_timeout=wait_timeout, retry=retry, page_size=page_size, max_results=max_results, + callback=callback, ) path = _to_query_path(project) @@ -496,10 +522,24 @@ def query_and_wait( if client.default_job_creation_mode: request_body["jobCreationMode"] = client.default_job_creation_mode + query_sent_factory = QuerySentEventFactory() + def do_query(): - request_body["requestId"] = make_job_id() + request_id = make_job_id() + request_body["requestId"] = request_id span_attributes = {"path": path} + if "dryRun" not in request_body: + callback( + query_sent_factory( + query=query, + billing_project=project, + location=location, + job_id=None, + request_id=request_id, + ) + ) + # For easier testing, handle the retries ourselves. if retry is not None: response = retry(client._call_api)( @@ -542,8 +582,25 @@ def do_query(): retry=retry, page_size=page_size, max_results=max_results, + callback=callback, ) + if "dryRun" not in request_body: + callback( + QueryFinishedEvent( + billing_project=project, + location=query_results.location, + query_id=query_results.query_id, + job_id=query_results.job_id, + total_rows=query_results.total_rows, + total_bytes_processed=query_results.total_bytes_processed, + slot_millis=query_results.slot_millis, + destination=None, + created=query_results.created, + started=query_results.started, + ended=query_results.ended, + ) + ) return table.RowIterator( client=client, api_request=functools.partial(client._call_api, retry, timeout=api_timeout), @@ -614,6 +671,8 @@ def _wait_or_cancel( retry: Optional[retries.Retry], page_size: Optional[int], max_results: Optional[int], + *, + callback: Callable = lambda _: None, ) -> table.RowIterator: """Wait for a job to complete and return the results. @@ -621,12 +680,43 @@ def _wait_or_cancel( the job. """ try: - return job.result( + if not job.dry_run: + callback( + QueryReceivedEvent( + billing_project=job.project, + location=job.location, + job_id=job.job_id, + statement_type=job.statement_type, + state=job.state, + query_plan=job.query_plan, + created=job.created, + started=job.started, + ended=job.ended, + ) + ) + query_results = job.result( page_size=page_size, max_results=max_results, retry=retry, timeout=wait_timeout, ) + if not job.dry_run: + callback( + QueryFinishedEvent( + billing_project=job.project, + location=query_results.location, + query_id=query_results.query_id, + job_id=query_results.job_id, + total_rows=query_results.total_rows, + total_bytes_processed=query_results.total_bytes_processed, + slot_millis=query_results.slot_millis, + destination=job.destination, + created=job.created, + started=job.started, + ended=job.ended, + ) + ) + return query_results except Exception: # Attempt to cancel the job since we can't return the results. try: @@ -635,3 +725,62 @@ def _wait_or_cancel( # Don't eat the original exception if cancel fails. pass raise + + +@dataclasses.dataclass(frozen=True) +class QueryFinishedEvent: + """Query finished successfully.""" + + billing_project: Optional[str] + location: Optional[str] + query_id: Optional[str] + job_id: Optional[str] + destination: Optional[table.TableReference] + total_rows: Optional[int] + total_bytes_processed: Optional[int] + slot_millis: Optional[int] + created: Optional[datetime.datetime] + started: Optional[datetime.datetime] + ended: Optional[datetime.datetime] + + +@dataclasses.dataclass(frozen=True) +class QueryReceivedEvent: + """Query received and acknowledged by the BigQuery API.""" + + billing_project: Optional[str] + location: Optional[str] + job_id: Optional[str] + statement_type: Optional[str] + state: Optional[str] + query_plan: Optional[list[google.cloud.bigquery.job.query.QueryPlanEntry]] + created: Optional[datetime.datetime] + started: Optional[datetime.datetime] + ended: Optional[datetime.datetime] + + +@dataclasses.dataclass(frozen=True) +class QuerySentEvent: + """Query sent to BigQuery.""" + + query: str + billing_project: Optional[str] + location: Optional[str] + job_id: Optional[str] + request_id: Optional[str] + + +class QueryRetryEvent(QuerySentEvent): + """Query sent another time because the previous attempt failed.""" + + +class QuerySentEventFactory: + """Creates a QuerySentEvent first, then QueryRetryEvent after that.""" + + def __init__(self): + self._event_constructor = QuerySentEvent + + def __call__(self, **kwargs): + result = self._event_constructor(**kwargs) + self._event_constructor = QueryRetryEvent + return result diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8048452db..4ca2cb428 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -15,6 +15,7 @@ """Client for interacting with the Google BigQuery API.""" from __future__ import absolute_import +from __future__ import annotations from __future__ import division from collections import abc as collections_abc @@ -31,6 +32,7 @@ import typing from typing import ( Any, + Callable, Dict, IO, Iterable, @@ -3633,8 +3635,8 @@ def query_and_wait( rate-limit-exceeded errors. Passing ``None`` disables job retry. Not all jobs can be retried. page_size (Optional[int]): - The maximum number of rows in each page of results from this - request. Non-positive values are ignored. + The maximum number of rows in each page of results from the + initial jobs.query request. Non-positive values are ignored. max_results (Optional[int]): The maximum total number of rows from this request. @@ -3656,6 +3658,39 @@ def query_and_wait( :class:`~google.cloud.bigquery.job.QueryJobConfig` class. """ + return self._query_and_wait_bigframes( + query, + job_config=job_config, + location=location, + project=project, + api_timeout=api_timeout, + wait_timeout=wait_timeout, + retry=retry, + job_retry=job_retry, + page_size=page_size, + max_results=max_results, + ) + + def _query_and_wait_bigframes( + self, + query, + *, + job_config: Optional[QueryJobConfig] = None, + location: Optional[str] = None, + project: Optional[str] = None, + api_timeout: TimeoutType = DEFAULT_TIMEOUT, + wait_timeout: Union[Optional[float], object] = POLLING_DEFAULT_VALUE, + retry: retries.Retry = DEFAULT_RETRY, + job_retry: retries.Retry = DEFAULT_JOB_RETRY, + page_size: Optional[int] = None, + max_results: Optional[int] = None, + callback: Callable = lambda _: None, + ) -> RowIterator: + """See query_and_wait. + + This method has an extra callback parameter, which is used by bigframes + to create better progress bars. + """ if project is None: project = self.project @@ -3681,6 +3716,7 @@ def query_and_wait( job_retry=job_retry, page_size=page_size, max_results=max_results, + callback=callback, ) def insert_rows( diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 44d8a92e6..b377f979d 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1550,6 +1550,8 @@ def result( # type: ignore # (incompatible with supertype) return _EmptyRowIterator( project=self.project, location=self.location, + schema=self.schema, + total_bytes_processed=self.total_bytes_processed, # Intentionally omit job_id and query_id since this doesn't # actually correspond to a finished query job. ) @@ -1737,7 +1739,11 @@ def is_job_done(): project=self.project, job_id=self.job_id, query_id=self.query_id, + schema=self.schema, num_dml_affected_rows=self._query_results.num_dml_affected_rows, + query=self.query, + total_bytes_processed=self.total_bytes_processed, + slot_millis=self.slot_millis, ) # We know that there's at least 1 row, so only treat the response from diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 58372f1e6..7f70f6a2a 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1228,11 +1228,18 @@ def location(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.job_reference + or https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.location Returns: str: Job ID of the query job. """ - return self._properties.get("jobReference", {}).get("location") + location = self._properties.get("jobReference", {}).get("location") + + # Sometimes there's no job, but we still want to get the location + # information. Prefer the value from job for backwards compatibilitity. + if not location: + location = self._properties.get("location") + return location @property def query_id(self) -> Optional[str]: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a0986c44e..219b31467 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1837,7 +1837,7 @@ def __init__( page_start=_rows_page_start, next_token="pageToken", ) - schema = _to_schema_fields(schema) + schema = _to_schema_fields(schema) if schema else () self._field_to_index = _helpers._field_to_index_mapping(schema) self._page_size = page_size self._preserve_order = False @@ -2917,7 +2917,6 @@ class _EmptyRowIterator(RowIterator): statements. """ - schema = () pages = () total_rows = 0 diff --git a/tests/unit/test_client_bigframes.py b/tests/unit/test_client_bigframes.py new file mode 100644 index 000000000..0fcc31e40 --- /dev/null +++ b/tests/unit/test_client_bigframes.py @@ -0,0 +1,411 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Client features enabling the bigframes integration.""" + +from __future__ import annotations + +import datetime +from unittest import mock + +import pytest + +import google.auth.credentials +from google.api_core import exceptions +from google.cloud import bigquery +import google.cloud.bigquery.client +from google.cloud.bigquery import _job_helpers + + +PROJECT = "test-project" +LOCATION = "test-location" + + +def make_response(body, *, status_code: int = 200): + response = mock.Mock() + type(response).status_code = mock.PropertyMock(return_value=status_code) + response.json.return_value = body + return response + + +@pytest.fixture +def client(): + """A real client object with mocked API requests.""" + credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + http_session = mock.Mock() + return google.cloud.bigquery.client.Client( + project=PROJECT, + credentials=credentials, + _http=http_session, + location=LOCATION, + ) + + +def test_query_and_wait_bigframes_dry_run_no_callback(client): + client._http.request.side_effect = [ + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + "location": LOCATION, + "queryId": "abcdefg", + "totalBytesProcessed": "123", + "jobComplete": True, + } + ), + ] + callback = mock.Mock() + job_config = bigquery.QueryJobConfig(dry_run=True) + response = client._query_and_wait_bigframes( + query="SELECT 1", job_config=job_config, callback=callback + ) + callback.assert_not_called() + assert response.total_bytes_processed == 123 + assert response.query_id == "abcdefg" + + +def test_query_and_wait_bigframes_callback(client): + created = datetime.datetime( + 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc + ) + started = datetime.datetime( + 2025, 8, 18, 10, 11, 13, 456000, tzinfo=datetime.timezone.utc + ) + ended = datetime.datetime( + 2025, 8, 18, 10, 11, 14, 567000, tzinfo=datetime.timezone.utc + ) + client._http.request.side_effect = [ + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + "location": LOCATION, + "queryId": "abcdefg", + "totalRows": "100", + "totalBytesProcessed": "123", + "totalSlotMs": "987", + "jobComplete": True, + "creationTime": _to_millis(created), + "startTime": _to_millis(started), + "endTime": _to_millis(ended), + } + ), + ] + callback = mock.Mock() + client._query_and_wait_bigframes(query="SELECT 1", callback=callback) + callback.assert_has_calls( + [ + mock.call( + _job_helpers.QuerySentEvent( + query="SELECT 1", + billing_project=PROJECT, + location=LOCATION, + # No job ID, because a basic query is eligible for jobs.query. + job_id=None, + request_id=mock.ANY, + ) + ), + mock.call( + _job_helpers.QueryFinishedEvent( + billing_project=PROJECT, + location=LOCATION, + query_id="abcdefg", + total_rows=100, + total_bytes_processed=123, + slot_millis=987, + created=created, + started=started, + ended=ended, + # No job ID or destination, because a basic query is eligible for jobs.query. + job_id=None, + destination=None, + ), + ), + ] + ) + + +def _to_millis(dt: datetime.datetime) -> str: + return str( + int( + (dt - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)) + / datetime.timedelta(milliseconds=1) + ) + ) + + +def test_query_and_wait_bigframes_with_jobs_insert_callback_empty_results(client): + client._http.request.side_effect = [ + # jobs.insert because destination table present in job_config + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "statistics": { + "creationTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ) + ), + "query": { + "statementType": "SELECT", + # "queryPlan": [{"name": "part1"}, {"name": "part2"}], + }, + }, + "status": { + "state": "PENDING", + }, + } + ), + # jobs.get waiting for query to finish + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "status": { + "state": "PENDING", + }, + } + ), + # jobs.getQueryResults with max_results=0 + make_response( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "jobComplete": True, + # totalRows is intentionally missing so we end up in the _EmptyRowIterator code path. + } + ), + # jobs.get + make_response( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "statistics": { + "creationTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ) + ), + "startTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 32, 123000, tzinfo=datetime.timezone.utc + ) + ), + "endTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 33, 123000, tzinfo=datetime.timezone.utc + ) + ), + "query": { + "statementType": "SELECT", + "totalBytesProcessed": 123, + "totalSlotMs": 987, + }, + }, + "status": {"state": "DONE"}, + } + ), + ] + callback = mock.Mock() + config = bigquery.QueryJobConfig() + config.destination = "proj.dset.table" + client._query_and_wait_bigframes( + query="SELECT 1", job_config=config, callback=callback + ) + callback.assert_has_calls( + [ + mock.call( + _job_helpers.QuerySentEvent( + query="SELECT 1", + billing_project="response-project", + location="response-location", + job_id="response-job-id", + # We use jobs.insert not jobs.query because destination is + # present on job_config. + request_id=None, + ) + ), + mock.call( + _job_helpers.QueryReceivedEvent( + billing_project="response-project", + location="response-location", + job_id="response-job-id", + statement_type="SELECT", + state="PENDING", + query_plan=[], + created=datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ), + started=None, + ended=None, + ) + ), + mock.call( + _job_helpers.QueryFinishedEvent( + billing_project="response-project", + location="response-location", + job_id="response-job-id", + query_id=None, + total_rows=0, + total_bytes_processed=123, + slot_millis=987, + created=datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ), + started=datetime.datetime( + 2025, 8, 13, 13, 7, 32, 123000, tzinfo=datetime.timezone.utc + ), + ended=datetime.datetime( + 2025, 8, 13, 13, 7, 33, 123000, tzinfo=datetime.timezone.utc + ), + destination=None, + ), + ), + ] + ) + + +def test_query_and_wait_bigframes_with_jobs_insert_dry_run_no_callback(client): + client._http.request.side_effect = [ + # jobs.insert because destination table present in job_config + make_response( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "statistics": { + "creationTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ) + ), + "query": { + "statementType": "SELECT", + "totalBytesProcessed": 123, + "schema": { + "fields": [ + {"name": "_f0", "type": "INTEGER"}, + ], + }, + }, + }, + "configuration": { + "dryRun": True, + }, + "status": {"state": "DONE"}, + } + ), + ] + callback = mock.Mock() + config = bigquery.QueryJobConfig() + config.destination = "proj.dset.table" + config.dry_run = True + result = client._query_and_wait_bigframes( + query="SELECT 1", job_config=config, callback=callback + ) + callback.assert_not_called() + assert result.total_bytes_processed == 123 + assert result.schema == [bigquery.SchemaField("_f0", "INTEGER")] + + +def test_query_and_wait_bigframes_with_query_retry_callbacks(client): + created = datetime.datetime( + 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc + ) + started = datetime.datetime( + 2025, 8, 18, 10, 11, 13, 456000, tzinfo=datetime.timezone.utc + ) + ended = datetime.datetime( + 2025, 8, 18, 10, 11, 14, 567000, tzinfo=datetime.timezone.utc + ) + client._http.request.side_effect = [ + exceptions.InternalServerError( + "first try", errors=({"reason": "jobInternalError"},) + ), + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + "location": LOCATION, + "queryId": "abcdefg", + "totalRows": "100", + "totalBytesProcessed": "123", + "totalSlotMs": "987", + "jobComplete": True, + "creationTime": _to_millis(created), + "startTime": _to_millis(started), + "endTime": _to_millis(ended), + } + ), + ] + callback = mock.Mock() + client._query_and_wait_bigframes(query="SELECT 1", callback=callback) + callback.assert_has_calls( + [ + mock.call( + _job_helpers.QuerySentEvent( + query="SELECT 1", + billing_project=PROJECT, + location=LOCATION, + # No job ID, because a basic query is eligible for jobs.query. + job_id=None, + request_id=mock.ANY, + ) + ), + mock.call( + _job_helpers.QueryRetryEvent( + query="SELECT 1", + billing_project=PROJECT, + location=LOCATION, + # No job ID, because a basic query is eligible for jobs.query. + job_id=None, + request_id=mock.ANY, + ) + ), + mock.call( + _job_helpers.QueryFinishedEvent( + billing_project=PROJECT, + location=LOCATION, + query_id=mock.ANY, + total_rows=100, + total_bytes_processed=123, + slot_millis=987, + created=created, + started=started, + ended=ended, + # No job ID or destination, because a basic query is eligible for jobs.query. + job_id=None, + destination=None, + ), + ), + ] + ) From b68483227693ea68f6b12eacca2be1803cffb1d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 18 Aug 2025 13:02:13 -0500 Subject: [PATCH 505/536] docs: add a TROUBLESHOOTING.md file with tips for logging (#2262) * docs: add a TROUBLESHOOTING.md file with tips for logging * typo * finish my sentence --------- Co-authored-by: Lingqing Gan --- TROUBLESHOOTING.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 TROUBLESHOOTING.md diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md new file mode 100644 index 000000000..7da12c440 --- /dev/null +++ b/TROUBLESHOOTING.md @@ -0,0 +1,34 @@ +# Troubleshooting steps + +## Enable logging of BQ Storage Read API session creation + +It can be helpful to get the BQ Storage Read API session to allow the BigQuery +backend team to debug cases of API instability. The logs that share the session +creation are in a module-specific logger. To enable the logs, refer to the +following code sample: + +```python +import logging +import google.cloud.bigquery + +# Configure the basic logging to show DEBUG level messages +log_formatter = logging.Formatter( + '%(asctime)s - %(levelname)s - %(message)s' +) +handler = logging.StreamHandler() +handler.setFormatter(log_formatter) +default_logger = logging.getLogger() +default_logger.setLevel(logging.DEBUG) +default_logger.addHandler(handler) +to_dataframe_logger = logging.getLogger("google.cloud.bigquery._pandas_helpers") +to_dataframe_logger.setLevel(logging.DEBUG) +to_dataframe_logger.addHandler(handler) + +# Example code that touches the BQ Storage Read API. +bqclient = google.cloud.bigquery.Client() +results = bqclient.query_and_wait("SELECT * FROM `bigquery-public-data.usa_names.usa_1910_2013`") +print(results.to_dataframe().head()) +``` + +In particular, watch for the text "with BQ Storage API session" in the logs +to get the streaming API session ID to share with your support person. From d41fd5c7475d2cb7f7afc4cd5aaa080abfe831e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 20 Aug 2025 14:36:03 -0500 Subject: [PATCH 506/536] chore: execute retry tests serially, since they depend on global time (#2265) * chore: migrate test_base retry tests * migrate job_helpers test * migrate more tests * fix initiate resumable upload tests * fix failing tests * remove dead test code --- tests/unit/conftest.py | 13 + tests/unit/job/test_async_job_retry.py | 139 +++++ tests/unit/job/test_base.py | 104 ---- tests/unit/job/test_query.py | 196 ------ tests/unit/job/test_query_job_retry.py | 229 +++++++ tests/unit/test__job_helpers.py | 105 ---- tests/unit/test__job_helpers_retry.py | 122 ++++ tests/unit/test_client.py | 587 ------------------ tests/unit/test_client_bigframes.py | 2 +- .../test_client_resumable_media_upload.py | 433 +++++++++++++ tests/unit/test_client_retry.py | 279 +++++++++ tests/unit/test_job_retry.py | 18 +- 12 files changed, 1226 insertions(+), 1001 deletions(-) create mode 100644 tests/unit/job/test_async_job_retry.py create mode 100644 tests/unit/job/test_query_job_retry.py create mode 100644 tests/unit/test__job_helpers_retry.py create mode 100644 tests/unit/test_client_resumable_media_upload.py create mode 100644 tests/unit/test_client_retry.py diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index ebe2d2a7a..5070a199b 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. from unittest import mock +import threading import pytest @@ -24,6 +25,18 @@ def client(): yield make_client() +time_lock = threading.Lock() + + +@pytest.fixture +def global_time_lock(): + """Fixture to run tests serially that depend on the global time state, + such as tests of retry behavior. + """ + with time_lock: + yield + + @pytest.fixture def PROJECT(): yield "PROJECT" diff --git a/tests/unit/job/test_async_job_retry.py b/tests/unit/job/test_async_job_retry.py new file mode 100644 index 000000000..35041aa1b --- /dev/null +++ b/tests/unit/job/test_async_job_retry.py @@ -0,0 +1,139 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import google.api_core.retry +from google.api_core import exceptions + +from . import helpers +import google.cloud.bigquery.job + + +PROJECT = "test-project" +JOB_ID = "test-job-id" + + +def test_cancel_w_custom_retry(global_time_lock): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + api_path = "/projects/{}/jobs/{}/cancel".format(PROJECT, JOB_ID) + resource = { + "jobReference": { + "jobId": JOB_ID, + "projectId": PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + expected = resource.copy() + expected["statistics"] = {} + response = {"job": resource} + conn = helpers.make_connection( + ValueError, + response, + ) + client = helpers._make_client(project=PROJECT, connection=conn) + job = google.cloud.bigquery.job._AsyncJob( + google.cloud.bigquery.job._JobReference(JOB_ID, PROJECT, "EU"), client + ) + + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + result = job.cancel(retry=retry, timeout=7.5) + + final_attributes.assert_called() + + assert result is True + assert job._properties == expected + conn.api_request.assert_has_calls( + [ + mock.call( + method="POST", + path=api_path, + query_params={"location": "EU"}, + timeout=7.5, + ), + mock.call( + method="POST", + path=api_path, + query_params={"location": "EU"}, + timeout=7.5, + ), # was retried once + ], + ) + + +def test_result_w_retry_wo_state(global_time_lock): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + + begun_job_resource = helpers._make_job_resource( + job_id=JOB_ID, project_id=PROJECT, location="EU", started=True + ) + done_job_resource = helpers._make_job_resource( + job_id=JOB_ID, + project_id=PROJECT, + location="EU", + started=True, + ended=True, + ) + conn = helpers.make_connection( + exceptions.NotFound("not normally retriable"), + begun_job_resource, + exceptions.NotFound("not normally retriable"), + done_job_resource, + ) + client = helpers._make_client(project=PROJECT, connection=conn) + job = google.cloud.bigquery.job._AsyncJob( + google.cloud.bigquery.job._JobReference(JOB_ID, PROJECT, "EU"), client + ) + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry( + predicate=custom_predicate, + initial=0.001, + maximum=0.001, + deadline=0.1, + ) + assert job.result(retry=custom_retry) is job + + begin_call = mock.call( + method="POST", + path=f"/projects/{PROJECT}/jobs", + data={ + "jobReference": { + "jobId": JOB_ID, + "projectId": PROJECT, + "location": "EU", + } + }, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={ + "projection": "full", + "location": "EU", + }, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index aa3d49ce3..f5861f645 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -17,8 +17,6 @@ import unittest from unittest import mock -from google.api_core import exceptions -import google.api_core.retry from google.api_core.future import polling import pytest @@ -882,50 +880,6 @@ def test_cancel_explicit(self): ) self.assertEqual(job._properties, expected) - def test_cancel_w_custom_retry(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID) - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - expected = resource.copy() - expected["statistics"] = {} - response = {"job": resource} - job = self._set_properties_job() - - api_request_patcher = mock.patch.object( - job._client._connection, "api_request", side_effect=[ValueError, response] - ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) - ) - - with api_request_patcher as fake_api_request: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - result = job.cancel(retry=retry, timeout=7.5) - - final_attributes.assert_called() - - self.assertTrue(result) - self.assertEqual(job._properties, expected) - self.assertEqual( - fake_api_request.call_args_list, - [ - mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), - mock.call( - method="POST", path=api_path, query_params={}, timeout=7.5 - ), # was retried once - ], - ) - def test__set_future_result_wo_done(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -1069,64 +1023,6 @@ def test_result_default_wo_state(self): ) conn.api_request.assert_has_calls([begin_call, begin_call, reload_call]) - def test_result_w_retry_wo_state(self): - from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT - - begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True - ) - done_job_resource = _make_job_resource( - job_id=self.JOB_ID, - project_id=self.PROJECT, - location="EU", - started=True, - ended=True, - ) - conn = make_connection( - exceptions.NotFound("not normally retriable"), - begun_job_resource, - exceptions.NotFound("not normally retriable"), - done_job_resource, - ) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one( - self._job_reference(self.JOB_ID, self.PROJECT, "EU"), client - ) - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry( - predicate=custom_predicate, - initial=0.001, - maximum=0.001, - deadline=0.1, - ) - self.assertIs(job.result(retry=custom_retry), job) - - begin_call = mock.call( - method="POST", - path=f"/projects/{self.PROJECT}/jobs", - data={ - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": "EU", - } - }, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={ - "projection": "full", - "location": "EU", - }, - timeout=DEFAULT_GET_JOB_TIMEOUT, - ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) - def test_result_explicit_w_state(self): conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 8f684c3e9..ef6429598 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -20,15 +20,11 @@ import types from unittest import mock -import freezegun -from google.api_core import exceptions -import google.api_core.retry import requests from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery._job_helpers import google.cloud.bigquery.query -import google.cloud.bigquery.retry from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT from google.cloud.bigquery.table import _EmptyRowIterator @@ -1335,102 +1331,6 @@ def test_result_with_max_results(self): [jobs_get_call, query_page_waiting_call, query_page_2_call] ) - def test_result_w_custom_retry(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - } - job_resource = self._make_resource(started=True, location="asia-northeast1") - job_resource_done = self._make_resource( - started=True, ended=True, location="asia-northeast1" - ) - job_resource_done["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - - connection = make_connection( - # Also, for each API request, raise an exception that we know can - # be retried. Because of this, for each iteration we do: - # jobs.get (x2) & jobs.getQueryResults (x2) - exceptions.NotFound("not normally retriable"), - job_resource, - exceptions.NotFound("not normally retriable"), - query_resource, - # Query still not done, repeat both. - exceptions.NotFound("not normally retriable"), - job_resource, - exceptions.NotFound("not normally retriable"), - query_resource, - exceptions.NotFound("not normally retriable"), - # Query still not done, repeat both. - job_resource_done, - exceptions.NotFound("not normally retriable"), - query_resource_done, - # Query finished! - ) - client = _make_client(self.PROJECT, connection=connection) - job = self._get_target_class().from_api_repr(job_resource, client) - - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry( - initial=0.001, - maximum=0.001, - multiplier=1.0, - deadline=0.1, - predicate=custom_predicate, - ) - - self.assertIsInstance(job.result(retry=custom_retry), RowIterator) - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0, "location": "asia-northeast1"}, - # TODO(tswast): Why do we end up setting timeout to - # google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT in - # some cases but not others? - timeout=mock.ANY, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"projection": "full", "location": "asia-northeast1"}, - timeout=DEFAULT_GET_JOB_TIMEOUT, - ) - - connection.api_request.assert_has_calls( - [ - # See make_connection() call above for explanation of the - # expected API calls. - # - # Query not done. - reload_call, - reload_call, - query_results_call, - query_results_call, - # Query still not done. - reload_call, - reload_call, - query_results_call, - query_results_call, - # Query done! - reload_call, - reload_call, - query_results_call, - query_results_call, - ] - ) - def test_result_w_empty_schema(self): from google.cloud.bigquery.table import _EmptyRowIterator @@ -1455,102 +1355,6 @@ def test_result_w_empty_schema(self): self.assertEqual(result.location, "asia-northeast1") self.assertEqual(result.query_id, "xyz-abc") - def test_result_w_timeout_doesnt_raise(self): - import google.cloud.bigquery.client - - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - job._properties["jobReference"]["location"] = "US" - job._properties["status"] = {"state": "RUNNING"} - - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): - job.result( - # Test that fractional seconds are supported, but use a timeout - # that is representable as a floating point without rounding - # errors since it can be represented exactly in base 2. In this - # case 1.125 is 9 / 8, which is a fraction with a power of 2 in - # the denominator. - timeout=1.125, - ) - - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"projection": "full", "location": "US"}, - timeout=1.125, - ) - get_query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={ - "maxResults": 0, - "location": "US", - }, - timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, - ) - connection.api_request.assert_has_calls( - [ - reload_call, - get_query_results_call, - reload_call, - ] - ) - - def test_result_w_timeout_raises_concurrent_futures_timeout(self): - import google.cloud.bigquery.client - - begun_resource = self._make_resource() - begun_resource["jobReference"]["location"] = "US" - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - job._properties["jobReference"]["location"] = "US" - job._properties["status"] = {"state": "RUNNING"} - - with freezegun.freeze_time( - "1970-01-01 00:00:00", auto_tick_seconds=1.0 - ), self.assertRaises(concurrent.futures.TimeoutError): - job.result(timeout=1.125) - - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"projection": "full", "location": "US"}, - timeout=1.125, - ) - get_query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={ - "maxResults": 0, - "location": "US", - }, - timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, - ) - connection.api_request.assert_has_calls( - [ - reload_call, - get_query_results_call, - # Timeout before we can reload with the final job state. - ] - ) - def test_result_w_page_size(self): # Arrange query_results_resource = { diff --git a/tests/unit/job/test_query_job_retry.py b/tests/unit/job/test_query_job_retry.py new file mode 100644 index 000000000..c8355b688 --- /dev/null +++ b/tests/unit/job/test_query_job_retry.py @@ -0,0 +1,229 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest import mock + +import concurrent.futures +import freezegun +from google.api_core import exceptions +import google.api_core.retry +import pytest + +from google.cloud.bigquery.client import _MIN_GET_QUERY_RESULTS_TIMEOUT +from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT +from google.cloud.bigquery.table import RowIterator + +from ..helpers import make_connection +from .helpers import _make_client + + +PROJECT = "test-project" +JOB_ID = "test-job-id" +QUERY = "select count(*) from persons" + + +def _make_resource(started=False, ended=False, location="US"): + resource = { + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID, "location": location}, + "status": {"state": "PENDING"}, + "configuration": { + "query": {"query": QUERY}, + "job_type": "query", + }, + "statistics": {"creationTime": "1"}, + } + + if started: + resource["status"]["state"] = "RUNNING" + resource["statistics"]["startTime"] = "2" + + if ended: + resource["status"]["state"] = "DONE" + resource["statistics"]["endTime"] = "3" + + return resource + + +def test_result_w_custom_retry(global_time_lock): + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = _make_resource(started=True, location="asia-northeast1") + job_resource_done = _make_resource( + started=True, ended=True, location="asia-northeast1" + ) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + + connection = make_connection( + # Also, for each API request, raise an exception that we know can + # be retried. Because of this, for each iteration we do: + # jobs.get (x2) & jobs.getQueryResults (x2) + exceptions.NotFound("not normally retriable"), + job_resource, + exceptions.NotFound("not normally retriable"), + query_resource, + # Query still not done, repeat both. + exceptions.NotFound("not normally retriable"), + job_resource, + exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + # Query still not done, repeat both. + job_resource_done, + exceptions.NotFound("not normally retriable"), + query_resource_done, + # Query finished! + ) + client = _make_client(PROJECT, connection=connection) + job = QueryJob.from_api_repr(job_resource, client) + + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry( + initial=0.001, + maximum=0.001, + multiplier=1.0, + deadline=0.1, + predicate=custom_predicate, + ) + + assert isinstance(job.result(retry=custom_retry), RowIterator) + query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/queries/{JOB_ID}", + query_params={"maxResults": 0, "location": "asia-northeast1"}, + timeout=mock.ANY, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={"projection": "full", "location": "asia-northeast1"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) + + connection.api_request.assert_has_calls( + [ + reload_call, + reload_call, + query_results_call, + query_results_call, + reload_call, + reload_call, + query_results_call, + query_results_call, + reload_call, + reload_call, + query_results_call, + query_results_call, + ] + ) + + +def test_result_w_timeout_doesnt_raise(global_time_lock): + begun_resource = _make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = begun_resource.copy() + done_resource["status"] = {"state": "DONE"} + connection = make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=PROJECT, connection=connection) + job = QueryJob(JOB_ID, QUERY, client) + job._properties["jobReference"]["location"] = "US" + job._properties["status"] = {"state": "RUNNING"} + + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): + job.result( + timeout=1.125, + ) + + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={"projection": "full", "location": "US"}, + timeout=1.125, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/queries/{JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=_MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + reload_call, + ] + ) + + +def test_result_w_timeout_raises_concurrent_futures_timeout(global_time_lock): + begun_resource = _make_resource() + begun_resource["jobReference"]["location"] = "US" + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = begun_resource.copy() + done_resource["status"] = {"state": "DONE"} + connection = make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=PROJECT, connection=connection) + job = QueryJob(JOB_ID, QUERY, client) + job._properties["jobReference"]["location"] = "US" + job._properties["status"] = {"state": "RUNNING"} + + with freezegun.freeze_time( + "1970-01-01 00:00:00", auto_tick_seconds=1.0 + ), pytest.raises(concurrent.futures.TimeoutError): + job.result(timeout=1.125) + + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={"projection": "full", "location": "US"}, + timeout=1.125, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/queries/{JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=_MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + ] + ) diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 417f911b8..1f543f033 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -15,7 +15,6 @@ from typing import Any, Dict, Optional from unittest import mock -import freezegun import google.api_core.exceptions from google.api_core import retry as retries import pytest @@ -450,110 +449,6 @@ def test_query_and_wait_uses_jobs_insert(): ) -def test_query_and_wait_retries_job(): - freezegun.freeze_time(auto_tick_seconds=100) - client = mock.create_autospec(Client) - client._call_api.__name__ = "_call_api" - client._call_api.__qualname__ = "Client._call_api" - client._call_api.__annotations__ = {} - client._call_api.__type_params__ = () - client._call_api.side_effect = ( - google.api_core.exceptions.BadGateway("retry me"), - google.api_core.exceptions.InternalServerError("job_retry me"), - google.api_core.exceptions.BadGateway("retry me"), - { - "jobReference": { - "projectId": "response-project", - "jobId": "abc", - "location": "response-location", - }, - "jobComplete": True, - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INT64", "mode": "NULLABLE"}, - ], - }, - "rows": [ - {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, - {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - ], - }, - ) - rows = _job_helpers.query_and_wait( - client, - query="SELECT 1", - location="request-location", - project="request-project", - job_config=None, - page_size=None, - max_results=None, - retry=retries.Retry( - lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), - multiplier=1.0, - ).with_deadline( - 200.0 - ), # Since auto_tick_seconds is 100, we should get at least 1 retry. - job_retry=retries.Retry( - lambda exc: isinstance(exc, google.api_core.exceptions.InternalServerError), - multiplier=1.0, - ).with_deadline(600.0), - ) - assert len(list(rows)) == 4 - - # For this code path, where the query has finished immediately, we should - # only be calling the jobs.query API and no other request path. - request_path = "/projects/request-project/queries" - for call in client._call_api.call_args_list: - _, kwargs = call - assert kwargs["method"] == "POST" - assert kwargs["path"] == request_path - - -@freezegun.freeze_time(auto_tick_seconds=100) -def test_query_and_wait_retries_job_times_out(): - client = mock.create_autospec(Client) - client._call_api.__name__ = "_call_api" - client._call_api.__qualname__ = "Client._call_api" - client._call_api.__annotations__ = {} - client._call_api.__type_params__ = () - client._call_api.side_effect = ( - google.api_core.exceptions.BadGateway("retry me"), - google.api_core.exceptions.InternalServerError("job_retry me"), - google.api_core.exceptions.BadGateway("retry me"), - google.api_core.exceptions.InternalServerError("job_retry me"), - ) - - with pytest.raises(google.api_core.exceptions.RetryError) as exc_info: - _job_helpers.query_and_wait( - client, - query="SELECT 1", - location="request-location", - project="request-project", - job_config=None, - page_size=None, - max_results=None, - retry=retries.Retry( - lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), - multiplier=1.0, - ).with_deadline( - 200.0 - ), # Since auto_tick_seconds is 100, we should get at least 1 retry. - job_retry=retries.Retry( - lambda exc: isinstance( - exc, google.api_core.exceptions.InternalServerError - ), - multiplier=1.0, - ).with_deadline(400.0), - ) - - assert isinstance( - exc_info.value.cause, google.api_core.exceptions.InternalServerError - ) - - def test_query_and_wait_sets_job_creation_mode(): client = mock.create_autospec(Client) client.default_job_creation_mode = "JOB_CREATION_OPTIONAL" diff --git a/tests/unit/test__job_helpers_retry.py b/tests/unit/test__job_helpers_retry.py new file mode 100644 index 000000000..3ea4b1aae --- /dev/null +++ b/tests/unit/test__job_helpers_retry.py @@ -0,0 +1,122 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import freezegun +import google.api_core.exceptions +from google.api_core import retry as retries +import pytest + +from google.cloud.bigquery import _job_helpers + +from . import helpers + + +def test_query_and_wait_retries_job(global_time_lock): + with freezegun.freeze_time(auto_tick_seconds=100): + conn = helpers.make_connection( + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + google.api_core.exceptions.BadGateway("retry me"), + { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + }, + ) + client = helpers.make_client(project="client-project") + client._connection = conn + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), + multiplier=1.0, + ).with_deadline( + 200.0 + ), # Since auto_tick_seconds is 100, we should get at least 1 retry. + job_retry=retries.Retry( + lambda exc: isinstance( + exc, google.api_core.exceptions.InternalServerError + ), + multiplier=1.0, + ).with_deadline(600.0), + ) + assert len(list(rows)) == 4 + + # For this code path, where the query has finished immediately, we should + # only be calling the jobs.query API and no other request path. + request_path = "/projects/request-project/queries" + for call in client._connection.api_request.call_args_list: + _, kwargs = call + assert kwargs["method"] == "POST" + assert kwargs["path"] == request_path + + +def test_query_and_wait_retries_job_times_out(global_time_lock): + with freezegun.freeze_time(auto_tick_seconds=100): + conn = helpers.make_connection( + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + ) + client = helpers.make_client(project="client-project") + client._connection = conn + + with pytest.raises(google.api_core.exceptions.RetryError) as exc_info: + _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), + multiplier=1.0, + ).with_deadline( + 200.0 + ), # Since auto_tick_seconds is 100, we should get at least 1 retry. + job_retry=retries.Retry( + lambda exc: isinstance( + exc, google.api_core.exceptions.InternalServerError + ), + multiplier=1.0, + ).with_deadline(400.0), + ) + + assert isinstance( + exc_info.value.cause, google.api_core.exceptions.InternalServerError + ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c3cf33279..213f382dc 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -16,7 +16,6 @@ import collections import datetime import decimal -import email import gzip import http.client import io @@ -28,13 +27,10 @@ from unittest import mock import warnings -import freezegun import packaging import pytest import requests -import google.api - try: import opentelemetry @@ -58,8 +54,6 @@ import google.cloud._helpers from google.cloud import bigquery -from google.cloud.bigquery import job as bqjob -import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference, Dataset from google.cloud.bigquery.enums import UpdateMode, DatasetView from google.cloud.bigquery import exceptions @@ -313,31 +307,6 @@ def test__call_api_extra_headers(self): headers = kwargs["headers"] assert headers["x-goog-request-reason"] == "because-friday" - def test__call_api_applying_custom_retry_on_timeout(self): - from concurrent.futures import TimeoutError - from google.cloud.bigquery.retry import DEFAULT_RETRY - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - api_request_patcher = mock.patch.object( - client._connection, - "api_request", - side_effect=[TimeoutError, "result"], - ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, TimeoutError) - ) - - with api_request_patcher as fake_api_request: - result = client._call_api(retry, foo="bar") - - self.assertEqual(result, "result") - self.assertEqual( - fake_api_request.call_args_list, - [mock.call(foo="bar"), mock.call(foo="bar")], # was retried once - ) - def test__call_api_span_creator_not_called(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -644,48 +613,6 @@ def test_get_service_account_email_w_alternate_project(self): ) self.assertEqual(service_account_email, email) - def test_get_service_account_email_w_custom_retry(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - api_path = "/projects/{}/serviceAccount".format(self.PROJECT) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - resource = { - "kind": "bigquery#getServiceAccountResponse", - "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com", - } - api_request_patcher = mock.patch.object( - client._connection, - "api_request", - side_effect=[ValueError, resource], - ) - - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) - ) - - with api_request_patcher as fake_api_request: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - service_account_email = client.get_service_account_email( - retry=retry, timeout=7.5 - ) - - final_attributes.assert_called_once_with({"path": api_path}, client, None) - self.assertEqual( - service_account_email, "bq-123@bigquery-encryption.iam.gserviceaccount.com" - ) - self.assertEqual( - fake_api_request.call_args_list, - [ - mock.call(method="GET", path=api_path, timeout=7.5), - mock.call(method="GET", path=api_path, timeout=7.5), # was retried once - ], - ) - def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference @@ -3848,176 +3775,6 @@ def test_load_table_from_uri_w_default_load_config(self): timeout=DEFAULT_TIMEOUT, ) - @staticmethod - def _mock_requests_response(status_code, headers, content=b""): - return mock.Mock( - content=content, - headers=headers, - status_code=status_code, - spec=["content", "headers", "status_code"], - ) - - def _mock_transport(self, status_code, headers, content=b""): - fake_transport = mock.Mock(spec=["request"]) - fake_response = self._mock_requests_response( - status_code, headers, content=content - ) - fake_transport.request.return_value = fake_response - return fake_transport - - def _initiate_resumable_upload_helper(self, num_retries=None, mtls=False): - from google.resumable_media.requests import ResumableUpload - from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE - from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE - from google.cloud.bigquery.client import _get_upload_headers - from google.cloud.bigquery.job import LoadJob - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.job import SourceFormat - - # Create mocks to be checked for doing transport. - resumable_url = "http://test.invalid?upload_id=hey-you" - response_headers = {"location": resumable_url} - fake_transport = self._mock_transport(http.client.OK, response_headers) - client = self._make_one(project=self.PROJECT, _http=fake_transport) - conn = client._connection = make_connection() - if mtls: - conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") - - # Create some mock arguments and call the method under test. - data = b"goodbye gudbi gootbee" - stream = io.BytesIO(data) - config = LoadJobConfig() - config.source_format = SourceFormat.CSV - job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) - metadata = job.to_api_repr() - upload, transport = client._initiate_resumable_upload( - stream, metadata, num_retries, None - ) - - # Check the returned values. - self.assertIsInstance(upload, ResumableUpload) - - host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" - upload_url = ( - f"{host_name}/upload/bigquery/v2/projects/{self.PROJECT}" - "/jobs?uploadType=resumable" - ) - self.assertEqual(upload.upload_url, upload_url) - expected_headers = _get_upload_headers(conn.user_agent) - self.assertEqual(upload._headers, expected_headers) - self.assertFalse(upload.finished) - self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) - self.assertIs(upload._stream, stream) - self.assertIsNone(upload._total_bytes) - self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE) - self.assertEqual(upload.resumable_url, resumable_url) - - retry_strategy = upload._retry_strategy - self.assertEqual(retry_strategy.max_sleep, 64.0) - if num_retries is None: - self.assertEqual(retry_strategy.max_cumulative_retry, 600.0) - self.assertIsNone(retry_strategy.max_retries) - else: - self.assertIsNone(retry_strategy.max_cumulative_retry) - self.assertEqual(retry_strategy.max_retries, num_retries) - self.assertIs(transport, fake_transport) - # Make sure we never read from the stream. - self.assertEqual(stream.tell(), 0) - - # Check the mocks. - request_headers = expected_headers.copy() - request_headers["x-upload-content-type"] = _GENERIC_CONTENT_TYPE - fake_transport.request.assert_called_once_with( - "POST", - upload_url, - data=json.dumps(metadata).encode("utf-8"), - headers=request_headers, - timeout=mock.ANY, - ) - - def test__initiate_resumable_upload(self): - self._initiate_resumable_upload_helper() - - def test__initiate_resumable_upload_mtls(self): - self._initiate_resumable_upload_helper(mtls=True) - - def test__initiate_resumable_upload_with_retry(self): - self._initiate_resumable_upload_helper(num_retries=11) - - def _do_multipart_upload_success_helper( - self, get_boundary, num_retries=None, project=None, mtls=False - ): - from google.cloud.bigquery.client import _get_upload_headers - from google.cloud.bigquery.job import LoadJob - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.job import SourceFormat - - fake_transport = self._mock_transport(http.client.OK, {}) - client = self._make_one(project=self.PROJECT, _http=fake_transport) - conn = client._connection = make_connection() - if mtls: - conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") - - if project is None: - project = self.PROJECT - - # Create some mock arguments. - data = b"Bzzzz-zap \x00\x01\xf4" - stream = io.BytesIO(data) - config = LoadJobConfig() - config.source_format = SourceFormat.CSV - job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) - metadata = job.to_api_repr() - size = len(data) - - response = client._do_multipart_upload( - stream, metadata, size, num_retries, None, project=project - ) - - # Check the mocks and the returned value. - self.assertIs(response, fake_transport.request.return_value) - self.assertEqual(stream.tell(), size) - get_boundary.assert_called_once_with() - - host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" - upload_url = ( - f"{host_name}/upload/bigquery/v2/projects/{project}" - "/jobs?uploadType=multipart" - ) - payload = ( - b"--==0==\r\n" - b"content-type: application/json; charset=UTF-8\r\n\r\n" - b"%(json_metadata)s" - b"\r\n" - b"--==0==\r\n" - b"content-type: */*\r\n\r\n" - b"%(data)s" - b"\r\n" - b"--==0==--" - ) % {b"json_metadata": json.dumps(metadata).encode("utf-8"), b"data": data} - - headers = _get_upload_headers(conn.user_agent) - headers["content-type"] = b'multipart/related; boundary="==0=="' - fake_transport.request.assert_called_once_with( - "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY - ) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_mtls(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, mtls=True) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_with_retry(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, num_retries=8) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_with_custom_project(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, project="custom-project") - def test_copy_table(self): from google.cloud.bigquery.job import CopyJob @@ -5543,143 +5300,6 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails_no_retries(self job_retry=None, ) - def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404(self): - """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 - - Sometimes after a Conflict, the fetch fails with a 404, but we know - because of the conflict that really the job does exist. Retry until we - get the job status (or timeout). - """ - job_id = "abc123" - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection( - # We're mocking QueryJob._begin, so this is only going to be - # jobs.get requests and responses. - google.api_core.exceptions.TooManyRequests("this is retriable by default"), - google.api_core.exceptions.NotFound("we lost your job"), - google.api_core.exceptions.NotFound("we lost your job again, sorry"), - { - "jobReference": { - "projectId": self.PROJECT, - "location": "TESTLOC", - "jobId": job_id, - } - }, - ) - - job_create_error = google.api_core.exceptions.Conflict("Job already exists.") - job_begin_patcher = mock.patch.object( - bqjob.QueryJob, "_begin", side_effect=job_create_error - ) - job_id_patcher = mock.patch.object( - google.cloud.bigquery._job_helpers, - "make_job_id", - return_value=job_id, - ) - - with job_begin_patcher, job_id_patcher: - # If get job request fails there does exist a job - # with this ID already, retry 404 until we get it (or fails for a - # non-retriable reason, see other tests). - result = client.query("SELECT 1;", job_id=None) - - jobs_get_path = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{job_id}", - query_params={ - "projection": "full", - }, - timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, - ) - conn.api_request.assert_has_calls( - # Double-check that it was jobs.get that was called for each of our - # mocked responses. - [jobs_get_path] - * 4, - ) - assert result.job_id == job_id - - def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( - self, - ): - """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 - - Sometimes after a Conflict, the fetch fails with a 404. If it keeps - failing with a 404, assume that the job actually doesn't exist. - """ - job_id_1 = "abc123" - job_id_2 = "xyz789" - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - # We're mocking QueryJob._begin, so that the connection should only get - # jobs.get requests. - job_create_error = google.api_core.exceptions.Conflict("Job already exists.") - job_begin_patcher = mock.patch.object( - bqjob.QueryJob, "_begin", side_effect=job_create_error - ) - conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("we lost your job again, sorry"), - { - "jobReference": { - "projectId": self.PROJECT, - "location": "TESTLOC", - "jobId": job_id_2, - } - }, - ) - - # Choose a small deadline so the 404 retries give up. - retry = ( - google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) - ) - job_id_patcher = mock.patch.object( - google.cloud.bigquery._job_helpers, - "make_job_id", - side_effect=[job_id_1, job_id_2], - ) - retry_patcher = mock.patch.object( - google.cloud.bigquery.retry, - "_DEFAULT_GET_JOB_CONFLICT_RETRY", - retry, - ) - - with freezegun.freeze_time( - "2025-01-01 00:00:00", - # 10x the retry deadline to guarantee a timeout. - auto_tick_seconds=10, - ), job_begin_patcher, job_id_patcher, retry_patcher: - # If get job request fails there does exist a job - # with this ID already, retry 404 until we get it (or fails for a - # non-retriable reason, see other tests). - result = client.query("SELECT 1;", job_id=None) - - jobs_get_path_1 = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{job_id_1}", - query_params={ - "projection": "full", - }, - timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, - ) - jobs_get_path_2 = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{job_id_2}", - query_params={ - "projection": "full", - }, - timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, - ) - conn.api_request.assert_has_calls( - # Double-check that it was jobs.get that was called for each of our - # mocked responses. - [jobs_get_path_1, jobs_get_path_2], - ) - assert result.job_id == job_id_2 - def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): from google.api_core.exceptions import Conflict from google.cloud.bigquery.job import QueryJob @@ -10032,213 +9652,6 @@ def test_load_table_from_json_unicode_emoji_data_case(self): assert sent_data_file.getvalue() == expected_bytes # Low-level tests - - @classmethod - def _make_resumable_upload_responses(cls, size): - """Make a series of responses for a successful resumable upload.""" - from google import resumable_media - - resumable_url = "http://test.invalid?upload_id=and-then-there-was-1" - initial_response = cls._make_response( - http.client.OK, "", {"location": resumable_url} - ) - data_response = cls._make_response( - resumable_media.PERMANENT_REDIRECT, - "", - {"range": "bytes=0-{:d}".format(size - 1)}, - ) - final_response = cls._make_response( - http.client.OK, - json.dumps({"size": size}), - {"Content-Type": "application/json"}, - ) - return [initial_response, data_response, final_response] - - @staticmethod - def _make_transport(responses=None): - import google.auth.transport.requests - - transport = mock.create_autospec( - google.auth.transport.requests.AuthorizedSession, instance=True - ) - transport.request.side_effect = responses - return transport - - def test__do_resumable_upload(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len) - ) - client = self._make_client(transport) - - result = client._do_resumable_upload( - file_obj, self.EXPECTED_CONFIGURATION, None, None - ) - - content = result.content.decode("utf-8") - assert json.loads(content) == {"size": file_obj_len} - - # Verify that configuration data was passed in with the initial - # request. - transport.request.assert_any_call( - "POST", - mock.ANY, - data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"), - headers=mock.ANY, - timeout=mock.ANY, - ) - - def test__do_resumable_upload_custom_project(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len) - ) - client = self._make_client(transport) - - result = client._do_resumable_upload( - file_obj, - self.EXPECTED_CONFIGURATION, - None, - None, - project="custom-project", - ) - - content = result.content.decode("utf-8") - assert json.loads(content) == {"size": file_obj_len} - - # Verify that configuration data was passed in with the initial - # request. - transport.request.assert_any_call( - "POST", - mock.ANY, - data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"), - headers=mock.ANY, - timeout=mock.ANY, - ) - - initiation_url = next( - ( - call[0][1] - for call in transport.request.call_args_list - if call[0][0] == "POST" and "uploadType=resumable" in call[0][1] - ), - None, - ) # pragma: NO COVER - - assert initiation_url is not None - assert "projects/custom-project" in initiation_url - - def test__do_resumable_upload_custom_timeout(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len) - ) - client = self._make_client(transport) - - client._do_resumable_upload( - file_obj, self.EXPECTED_CONFIGURATION, num_retries=0, timeout=3.14 - ) - - # The timeout should be applied to all underlying calls. - for call_args in transport.request.call_args_list: - assert call_args[1].get("timeout") == 3.14 - - def test__do_multipart_upload(self): - transport = self._make_transport([self._make_response(http.client.OK)]) - client = self._make_client(transport) - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - client._do_multipart_upload( - file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, None, None - ) - - # Verify that configuration data was passed in with the initial - # request. - request_args = transport.request.mock_calls[0][2] - request_data = request_args["data"].decode("utf-8") - request_headers = request_args["headers"] - - request_content = email.message_from_string( - "Content-Type: {}\r\n{}".format( - request_headers["content-type"].decode("utf-8"), request_data - ) - ) - - # There should be two payloads: the configuration and the binary daya. - configuration_data = request_content.get_payload(0).get_payload() - binary_data = request_content.get_payload(1).get_payload() - - assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION - assert binary_data.encode("utf-8") == file_obj.getvalue() - - def test__do_multipart_upload_wrong_size(self): - client = self._make_client() - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - with pytest.raises(ValueError): - client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None, None) - - def test_schema_from_json_with_file_path(self): - from google.cloud.bigquery.schema import SchemaField - - file_content = """[ - { - "description": "quarter", - "mode": "REQUIRED", - "name": "qtr", - "type": "STRING" - }, - { - "description": "sales representative", - "mode": "NULLABLE", - "name": "rep", - "type": "STRING" - }, - { - "description": "total sales", - "mode": "NULLABLE", - "name": "sales", - "type": "FLOAT" - } - ]""" - - expected = [ - SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), - SchemaField( - "rep", - "STRING", - "NULLABLE", - description="sales representative", - ), - SchemaField( - "sales", - "FLOAT", - "NULLABLE", - description="total sales", - ), - ] - - client = self._make_client() - mock_file_path = "/mocked/file.json" - - open_patch = mock.patch( - "builtins.open", new=mock.mock_open(read_data=file_content) - ) - - with open_patch as _mock_file: - actual = client.schema_from_json(mock_file_path) - _mock_file.assert_called_once_with(mock_file_path) - # This assert is to make sure __exit__ is called in the context - # manager that opens the file in the function - _mock_file().__exit__.assert_called_once() - - assert expected == actual - def test_schema_from_json_with_file_object(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_client_bigframes.py b/tests/unit/test_client_bigframes.py index 0fcc31e40..0260da5e4 100644 --- a/tests/unit/test_client_bigframes.py +++ b/tests/unit/test_client_bigframes.py @@ -338,7 +338,7 @@ def test_query_and_wait_bigframes_with_jobs_insert_dry_run_no_callback(client): assert result.schema == [bigquery.SchemaField("_f0", "INTEGER")] -def test_query_and_wait_bigframes_with_query_retry_callbacks(client): +def test_query_and_wait_bigframes_with_query_retry_callbacks(client, global_time_lock): created = datetime.datetime( 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc ) diff --git a/tests/unit/test_client_resumable_media_upload.py b/tests/unit/test_client_resumable_media_upload.py new file mode 100644 index 000000000..642c18d15 --- /dev/null +++ b/tests/unit/test_client_resumable_media_upload.py @@ -0,0 +1,433 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock +import email +import http.client +import io +import json + +import pytest + +from google.cloud.bigquery.table import TableReference + +from .helpers import make_connection + + +PROJECT = "test-project" +TABLE_REF = TableReference.from_string(f"{PROJECT}.test_dataset.test_table") +EXPECTED_CONFIGURATION = { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": "test_dataset", + "tableId": "test_table", + }, + "sourceFormat": "CSV", + } +} + + +@pytest.fixture(autouse=True) +def mock_sleep(monkeypatch): + sleep = mock.Mock() + monkeypatch.setattr("time.sleep", sleep) + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(*args, **kw): + from google.cloud.bigquery.client import Client + + kw["credentials"] = _make_credentials() + kw["project"] = PROJECT + return Client(*args, **kw) + + +def _make_file_obj(contents=b"some data"): + return io.BytesIO(contents) + + +def _make_response(status_code, content=b"", headers=None): + response = mock.Mock(spec=["status_code", "content", "request", "headers"]) + response.status_code = status_code + response.content = content + response.headers = headers or {} + response.request = mock.Mock(spec=["headers"]) + return response + + +def _make_resumable_upload_responses(num_bytes): + # In a real scenario, the upload URL is returned in a 'Location' + # header. + return [ + _make_response( + http.client.OK, + headers={"location": "http://test.invalid/upload-id"}, + ), + _make_response( + http.client.OK, content=json.dumps({"size": num_bytes}).encode("utf-8") + ), + ] + + +def _make_transport(responses=None): + import google.auth.transport.requests + + transport = mock.create_autospec( + google.auth.transport.requests.AuthorizedSession, instance=True + ) + transport.request.side_effect = responses + return transport + + +def _mock_requests_response(status_code, headers, content=b""): + return mock.Mock( + content=content, + headers=headers, + status_code=status_code, + spec=["content", "headers", "status_code"], + ) + + +def _mock_transport(status_code, headers, content=b""): + fake_transport = mock.Mock(spec=["request"]) + fake_response = _mock_requests_response(status_code, headers, content=content) + fake_transport.request.return_value = fake_response + return fake_transport + + +def _initiate_resumable_upload_helper(num_retries=None, mtls=False): + from google.resumable_media.requests import ResumableUpload + from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE + from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SourceFormat + + # Create mocks to be checked for doing transport. + resumable_url = "http://test.invalid?upload_id=hey-you" + response_headers = {"location": resumable_url} + fake_transport = _mock_transport(http.client.OK, response_headers) + client = _make_client(_http=fake_transport) + conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") + + # Create some mock arguments and call the method under test. + data = b"goodbye gudbi gootbee" + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = SourceFormat.CSV + job = LoadJob(None, None, TABLE_REF, client, job_config=config) + metadata = job.to_api_repr() + upload, transport_out = client._initiate_resumable_upload( + stream, metadata, num_retries, None + ) + + # Check the returned values. + assert isinstance(upload, ResumableUpload) + + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" + upload_url = ( + f"{host_name}/upload/bigquery/v2/projects/{PROJECT}/jobs?uploadType=resumable" + ) + assert upload.upload_url == upload_url + expected_headers = _get_upload_headers(conn.user_agent) + assert upload._headers == expected_headers + assert not upload.finished + assert upload._chunk_size == _DEFAULT_CHUNKSIZE + assert upload._stream is stream + assert upload._total_bytes is None + assert upload._content_type == _GENERIC_CONTENT_TYPE + assert upload.resumable_url == resumable_url + + retry_strategy = upload._retry_strategy + assert retry_strategy.max_sleep == 64.0 + if num_retries is None: + assert retry_strategy.max_cumulative_retry == 600.0 + assert retry_strategy.max_retries is None + else: + assert retry_strategy.max_cumulative_retry is None + assert retry_strategy.max_retries == num_retries + assert transport_out is fake_transport + # Make sure we never read from the stream. + assert stream.tell() == 0 + + # Check the mocks. + request_headers = expected_headers.copy() + request_headers["x-upload-content-type"] = _GENERIC_CONTENT_TYPE + fake_transport.request.assert_called_once_with( + "POST", + upload_url, + data=json.dumps(metadata).encode("utf-8"), + headers=request_headers, + timeout=mock.ANY, + ) + + +def test__initiate_resumable_upload(): + _initiate_resumable_upload_helper() + + +def test__initiate_resumable_upload_mtls(): + _initiate_resumable_upload_helper(mtls=True) + + +def test_initiate_resumable_upload_with_retry(): + _initiate_resumable_upload_helper(num_retries=11) + + +def _do_multipart_upload_success_helper( + get_boundary, num_retries=None, project=None, mtls=False +): + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SourceFormat + + fake_transport = _mock_transport(http.client.OK, {}) + client = _make_client(_http=fake_transport) + conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") + + if project is None: + project = PROJECT + + # Create some mock arguments. + data = b"Bzzzz-zap \x00\x01\xf4" + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = SourceFormat.CSV + job = LoadJob(None, None, TABLE_REF, client, job_config=config) + metadata = job.to_api_repr() + size = len(data) + + response = client._do_multipart_upload( + stream, metadata, size, num_retries, None, project=project + ) + + # Check the mocks and the returned value. + assert response is fake_transport.request.return_value + assert stream.tell() == size + get_boundary.assert_called_once_with() + + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" + upload_url = ( + f"{host_name}/upload/bigquery/v2/projects/{project}/jobs?uploadType=multipart" + ) + payload = ( + b"--==0==\r\n" + b"content-type: application/json; charset=UTF-8\r\n\r\n" + b"%(json_metadata)s" + b"\r\n" + b"--==0==\r\n" + b"content-type: */*\r\n\r\n" + b"%(data)s" + b"\r\n" + b"--==0==--" + ) % {b"json_metadata": json.dumps(metadata).encode("utf-8"), b"data": data} + + headers = _get_upload_headers(conn.user_agent) + headers["content-type"] = b'multipart/related; boundary="==0=="' + fake_transport.request.assert_called_once_with( + "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY + ) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test__do_multipart_upload(get_boundary): + _do_multipart_upload_success_helper(get_boundary) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test__do_multipart_upload_mtls(get_boundary): + _do_multipart_upload_success_helper(get_boundary, mtls=True) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test_do_multipart_upload_with_retry(get_boundary): + _do_multipart_upload_success_helper(get_boundary, num_retries=8) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test__do_multipart_upload_with_custom_project(get_boundary): + _do_multipart_upload_success_helper(get_boundary, project="custom-project") + + +def test__do_resumable_upload(): + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = _make_transport(_make_resumable_upload_responses(file_obj_len)) + client = _make_client(_http=transport) + + result = client._do_resumable_upload(file_obj, EXPECTED_CONFIGURATION, None, None) + + content = result.content.decode("utf-8") + assert json.loads(content) == {"size": file_obj_len} + + transport.request.assert_any_call( + "POST", + mock.ANY, + data=json.dumps(EXPECTED_CONFIGURATION).encode("utf-8"), + headers=mock.ANY, + timeout=mock.ANY, + ) + + +def test__do_resumable_upload_custom_project(): + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = _make_transport(_make_resumable_upload_responses(file_obj_len)) + client = _make_client(_http=transport) + + result = client._do_resumable_upload( + file_obj, + EXPECTED_CONFIGURATION, + None, + None, + project="custom-project", + ) + + content = result.content.decode("utf-8") + assert json.loads(content) == {"size": file_obj_len} + + transport.request.assert_any_call( + "POST", + mock.ANY, + data=json.dumps(EXPECTED_CONFIGURATION).encode("utf-8"), + headers=mock.ANY, + timeout=mock.ANY, + ) + + initiation_url = next( + ( + call[0][1] + for call in transport.request.call_args_list + if call[0][0] == "POST" and "uploadType=resumable" in call[0][1] + ), + None, + ) + assert initiation_url is not None + assert "projects/custom-project" in initiation_url + + +def test__do_resumable_upload_custom_timeout(): + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = _make_transport(_make_resumable_upload_responses(file_obj_len)) + client = _make_client(_http=transport) + + client._do_resumable_upload( + file_obj, EXPECTED_CONFIGURATION, num_retries=0, timeout=3.14 + ) + + for call_args in transport.request.call_args_list: + assert call_args[1].get("timeout") == 3.14 + + +def test__do_multipart_upload_request_body(): + transport = _make_transport([_make_response(http.client.OK)]) + client = _make_client(_http=transport) + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + client._do_multipart_upload( + file_obj, EXPECTED_CONFIGURATION, file_obj_len, None, None + ) + + request_args = transport.request.mock_calls[0][2] + request_data = request_args["data"].decode("utf-8") + request_headers = request_args["headers"] + + request_content = email.message_from_string( + "Content-Type: {}\n{}".format( + request_headers["content-type"].decode("utf-8"), request_data + ) + ) + + configuration_data = request_content.get_payload(0).get_payload() + binary_data = request_content.get_payload(1).get_payload() + + assert json.loads(configuration_data) == EXPECTED_CONFIGURATION + assert binary_data.encode("utf-8") == file_obj.getvalue() + + +def test__do_multipart_upload_wrong_size(): + client = _make_client() + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + with pytest.raises(ValueError): + client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None, None) + + +def test_schema_from_json_with_file_path(): + from google.cloud.bigquery.schema import SchemaField + + file_content = """ + [ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING" + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING" + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT" + } + ]""" + + expected = [ + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", + "STRING", + "NULLABLE", + description="sales representative", + ), + SchemaField( + "sales", + "FLOAT", + "NULLABLE", + description="total sales", + ), + ] + + client = _make_client() + mock_file_path = "/mocked/file.json" + + open_patch = mock.patch("builtins.open", new=mock.mock_open(read_data=file_content)) + + with open_patch as _mock_file: + actual = client.schema_from_json(mock_file_path) + _mock_file.assert_called_once_with(mock_file_path) + _mock_file.return_value.read.assert_called_once() + + assert expected == actual diff --git a/tests/unit/test_client_retry.py b/tests/unit/test_client_retry.py new file mode 100644 index 000000000..6e49cc464 --- /dev/null +++ b/tests/unit/test_client_retry.py @@ -0,0 +1,279 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import freezegun +import google.api_core.exceptions +from google.cloud.bigquery import job as bqjob +from google.cloud.bigquery.retry import DEFAULT_RETRY +from .helpers import make_connection + + +PROJECT = "test-project" + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(*args, **kw): + from google.cloud.bigquery.client import Client + + return Client(*args, **kw) + + +def test_get_service_account_email_w_custom_retry(global_time_lock): + api_path = f"/projects/{PROJECT}/serviceAccount" + creds = _make_credentials() + http = object() + client = _make_client(project=PROJECT, credentials=creds, _http=http) + + resource = { + "kind": "bigquery#getServiceAccountResponse", + "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com", + } + api_request_patcher = mock.patch.object( + client._connection, + "api_request", + side_effect=[ValueError, resource], + ) + + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with api_request_patcher as fake_api_request: + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + service_account_email = client.get_service_account_email( + retry=retry, timeout=7.5 + ) + + final_attributes.assert_called_once_with({"path": api_path}, client, None) + assert service_account_email == "bq-123@bigquery-encryption.iam.gserviceaccount.com" + assert fake_api_request.call_args_list == [ + mock.call(method="GET", path=api_path, timeout=7.5), + mock.call(method="GET", path=api_path, timeout=7.5), # was retried once + ] + + +def test_call_api_applying_custom_retry_on_timeout(global_time_lock): + from concurrent.futures import TimeoutError + + creds = _make_credentials() + client = _make_client(project=PROJECT, credentials=creds) + + api_request_patcher = mock.patch.object( + client._connection, + "api_request", + side_effect=[TimeoutError, "result"], + ) + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, TimeoutError) + ) + + with api_request_patcher as fake_api_request: + result = client._call_api(retry, foo="bar") + + assert result == "result" + assert fake_api_request.call_args_list == [ + mock.call(foo="bar"), + mock.call(foo="bar"), + ] + + +def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404( + global_time_lock, +): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404, but we know + because of the conflict that really the job does exist. Retry until we + get the job status (or timeout). + """ + job_id = "abc123" + creds = _make_credentials() + http = object() + client = _make_client(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + # We're mocking QueryJob._begin, so this is only going to be + # jobs.get requests and responses. + google.api_core.exceptions.TooManyRequests("this is retriable by default"), + google.api_core.exceptions.NotFound("we lost your job"), + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": PROJECT, + "location": "TESTLOC", + "jobId": job_id, + } + }, + ) + + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + return_value=job_id, + ) + + with job_begin_patcher, job_id_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{job_id}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path] + * 4, + ) + assert result.job_id == job_id + + +def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( + global_time_lock, +): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404. If it keeps + failing with a 404, assume that the job actually doesn't exist. + """ + job_id_1 = "abc123" + job_id_2 = "xyz789" + creds = _make_credentials() + http = object() + client = _make_client(project=PROJECT, credentials=creds, _http=http) + + # We're mocking QueryJob._begin, so that the connection should only get + # jobs.get requests. + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": PROJECT, + "location": "TESTLOC", + "jobId": job_id_2, + } + }, + ) + + # Choose a small deadline so the 404 retries give up. + retry = google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + side_effect=[job_id_1, job_id_2], + ) + retry_patcher = mock.patch.object( + google.cloud.bigquery.retry, + "_DEFAULT_GET_JOB_CONFLICT_RETRY", + retry, + ) + + with freezegun.freeze_time( + "2025-01-01 00:00:00", + # 10x the retry deadline to guarantee a timeout. + auto_tick_seconds=10, + ), job_begin_patcher, job_id_patcher, retry_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path_1 = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{job_id_1}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + jobs_get_path_2 = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{job_id_2}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path_1, jobs_get_path_2], + ) + assert result.job_id == job_id_2 + + +def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retry(global_time_lock): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + If we get a 409 conflict on jobs.insert, and we are using a random + job ID, we should retry by getting the job by ID. This test ensures that + if the get job by ID fails, we retry the whole sequence. + """ + from google.cloud.bigquery import job + + client = _make_client(project=PROJECT, credentials=_make_credentials()) + job_id = "some-random-job-id" + query_text = "SELECT 1" + job_config = job.QueryJobConfig() + job_config.use_legacy_sql = False + + job_resource = { + "jobReference": {"projectId": PROJECT, "jobId": job_id}, + "configuration": {"query": {"query": query_text}}, + "status": {"state": "DONE"}, + } + + conn = make_connection( + # First attempt at jobs.insert fails with a 409 + google.api_core.exceptions.Conflict("Job already exists."), + # First attempt at jobs.get fails with a 500 + google.api_core.exceptions.InternalServerError("get job failed"), + # Second attempt at jobs.insert succeeds + job_resource, + ) + client._connection = conn + + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + return_value=job_id, + ) + + with job_id_patcher: + query_job = client.query(query_text, job_config=job_config, job_id=None) + + assert query_job.job_id == job_id diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 7144c640b..7343fed3d 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -80,7 +80,7 @@ ), ], ) -def test_retry_failed_jobs(sleep, reason, job_retry, result_retry): +def test_retry_failed_jobs(sleep, reason, job_retry, result_retry, global_time_lock): client = make_client() err = dict(reason=reason) conn = client._connection = make_connection( @@ -138,7 +138,7 @@ def test_retry_failed_jobs(sleep, reason, job_retry, result_retry): def test_retry_connection_error_with_default_retries_and_successful_first_job( - monkeypatch, client + monkeypatch, client, global_time_lock ): """ Make sure ConnectionError can be retried at `is_job_done` level, even if @@ -254,7 +254,7 @@ def make_job_id(*args, **kwargs): def test_query_retry_with_default_retry_and_ambiguous_errors_only_retries_with_failed_job( - client, monkeypatch + client, monkeypatch, global_time_lock ): """ Some errors like 'rateLimitExceeded' can be ambiguous. Make sure we only @@ -419,7 +419,7 @@ def make_job_id(*args, **kwargs): # - Pass None retry to `result`. @pytest.mark.parametrize("job_retry_on_query", ["Query", "Result"]) @mock.patch("time.sleep") -def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query): +def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query, global_time_lock): """ Test retry of job failures, as opposed to API-invocation failures. """ @@ -450,7 +450,7 @@ def api_request(method, path, query_params=None, data=None, **kw): @mock.patch("time.sleep") -def test_retry_failed_jobs_after_retry_failed(sleep, client): +def test_retry_failed_jobs_after_retry_failed(sleep, client, global_time_lock): """ If at first you don't succeed, maybe you will later. :) """ @@ -508,7 +508,7 @@ def api_request(method, path, query_params=None, data=None, **kw): assert job.job_id != orig_job_id -def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): +def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client, global_time_lock): with pytest.raises( TypeError, match=( @@ -520,7 +520,9 @@ def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) -def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): +def test_raises_on_job_retry_on_result_with_non_retryable_jobs( + client, global_time_lock +): client._connection = make_connection({}) with pytest.warns( @@ -542,7 +544,7 @@ def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): job.result(job_retry=google.api_core.retry.Retry()) -def test_query_and_wait_retries_job_for_DDL_queries(): +def test_query_and_wait_retries_job_for_DDL_queries(global_time_lock): """ Specific test for retrying DDL queries with "jobRateLimitExceeded" error: https://github.com/googleapis/python-bigquery/issues/1790 From 84fa75b3e52d41f3778c1654779f03598919168f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 15:03:23 -0500 Subject: [PATCH 507/536] chore(main): release 3.36.0 (#2255) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 374448a5e..62352c344 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.36.0](https://github.com/googleapis/python-bigquery/compare/v3.35.1...v3.36.0) (2025-08-20) + + +### Features + +* Add created/started/ended properties to RowIterator. ([#2260](https://github.com/googleapis/python-bigquery/issues/2260)) ([0a95b24](https://github.com/googleapis/python-bigquery/commit/0a95b24192395cc3ccf801aa9bc318999873a2bf)) +* Retry query jobs if `jobBackendError` or `jobInternalError` are encountered ([#2256](https://github.com/googleapis/python-bigquery/issues/2256)) ([3deff1d](https://github.com/googleapis/python-bigquery/commit/3deff1d963980800e8b79fa3aaf5b712d4fd5062)) + + +### Documentation + +* Add a TROUBLESHOOTING.md file with tips for logging ([#2262](https://github.com/googleapis/python-bigquery/issues/2262)) ([b684832](https://github.com/googleapis/python-bigquery/commit/b68483227693ea68f6b12eacca2be1803cffb1d1)) +* Update README to break infinite redirect loop ([#2254](https://github.com/googleapis/python-bigquery/issues/2254)) ([8f03166](https://github.com/googleapis/python-bigquery/commit/8f031666114a826da2ad965f8ecd4727466cb480)) + ## [3.35.1](https://github.com/googleapis/python-bigquery/compare/v3.35.0...v3.35.1) (2025-07-21) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index d565bc46e..a8f4c8e14 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.35.1" +__version__ = "3.36.0" From ef2740a158199633b5543a7b6eb19587580792cd Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 26 Aug 2025 10:52:30 -0500 Subject: [PATCH 508/536] feat: updates to fastpath query execution (#2268) This PR updates query handling to allow base config properties like job timeout, reservation, and a preview max slots field to leverage the faster path (e.g. using jobs.query rather than jobs.insert). --- google/cloud/bigquery/_job_helpers.py | 3 ++ google/cloud/bigquery/job/base.py | 31 ++++++++++++++++++++ tests/unit/job/test_base.py | 41 +++++++++++++++++++++++++++ tests/unit/job/test_query_config.py | 5 ++++ tests/unit/test__job_helpers.py | 28 ++++++++++++++++++ 5 files changed, 108 insertions(+) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 6fd561f8c..27e90246f 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -658,6 +658,9 @@ def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: "requestId", "createSession", "writeIncrementalResults", + "jobTimeoutMs", + "reservation", + "maxSlots", } unsupported_keys = request_keys - keys_allowlist diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index f007b9341..1344082be 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -224,6 +224,37 @@ def job_timeout_ms(self, value): else: self._properties.pop("jobTimeoutMs", None) + @property + def max_slots(self) -> Optional[int]: + """The maximum rate of slot consumption to allow for this job. + + If set, the number of slots used to execute the job will be throttled + to try and keep its slot consumption below the requested rate. + This feature is not generally available. + """ + + max_slots = self._properties.get("maxSlots") + if max_slots is not None: + if isinstance(max_slots, str): + return int(max_slots) + if isinstance(max_slots, int): + return max_slots + return None + + @max_slots.setter + def max_slots(self, value): + try: + value = _int_or_none(value) + except ValueError as err: + raise ValueError("Pass an int for max slots, e.g. 100").with_traceback( + err.__traceback__ + ) + + if value is not None: + self._properties["maxSlots"] = str(value) + else: + self._properties.pop("maxSlots", None) + @property def reservation(self): """str: Optional. The reservation that job would use. diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index f5861f645..420904820 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -1276,3 +1276,44 @@ def test_reservation_setter(self): job_config = self._make_one() job_config.reservation = "foo" self.assertEqual(job_config._properties["reservation"], "foo") + + def test_max_slots_miss(self): + job_config = self._make_one() + self.assertEqual(job_config.max_slots, None) + + def test_max_slots_set_and_clear(self): + job_config = self._make_one() + job_config.max_slots = 14 + self.assertEqual(job_config.max_slots, 14) + job_config.max_slots = None + self.assertEqual(job_config.max_slots, None) + + def test_max_slots_hit_str(self): + job_config = self._make_one() + job_config._properties["maxSlots"] = "4" + self.assertEqual(job_config.max_slots, 4) + + def test_max_slots_hit_int(self): + job_config = self._make_one() + job_config._properties["maxSlots"] = int(3) + self.assertEqual(job_config.max_slots, 3) + + def test_max_slots_hit_invalid(self): + job_config = self._make_one() + job_config._properties["maxSlots"] = object() + self.assertEqual(job_config.max_slots, None) + + def test_max_slots_update_in_place(self): + job_config = self._make_one() + job_config.max_slots = 45 # update in place + self.assertEqual(job_config.max_slots, 45) + + def test_max_slots_setter_invalid(self): + job_config = self._make_one() + with self.assertRaises(ValueError): + job_config.max_slots = "foo" + + def test_max_slots_setter(self): + job_config = self._make_one() + job_config.max_slots = 123 + self.assertEqual(job_config._properties["maxSlots"], "123") diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py index e0878d067..a63a14b73 100644 --- a/tests/unit/job/test_query_config.py +++ b/tests/unit/job/test_query_config.py @@ -172,6 +172,11 @@ def test_incremental_results(self): config.write_incremental_results = True self.assertEqual(config.write_incremental_results, True) + def test_max_slots(self): + config = self._get_target_class()() + config.max_slots = 99 + self.assertEqual(config.max_slots, 99) + def test_create_session(self): config = self._get_target_class()() self.assertIsNone(config.create_session) diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 1f543f033..10cbefe13 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -200,6 +200,19 @@ def make_query_response( make_query_request({"writeIncrementalResults": True}), id="job_config-with-incremental-results", ), + pytest.param( + job_query.QueryJobConfig( + reservation="foo", + max_slots=100, + ), + make_query_request( + { + "maxSlots": "100", + "reservation": "foo", + } + ), + id="job_config-with-reservation-and-slots", + ), ), ) def test__to_query_request(job_config, expected): @@ -1048,6 +1061,21 @@ def test_make_job_id_w_job_id_overrides_prefix(): True, id="write_incremental_results", ), + pytest.param( + job_query.QueryJobConfig(job_timeout_ms=1000), + True, + id="job_timeout_ms", + ), + pytest.param( + job_query.QueryJobConfig(reservation="foo"), + True, + id="reservation", + ), + pytest.param( + job_query.QueryJobConfig(max_slots=20), + True, + id="max_slots", + ), ), ) def test_supported_by_jobs_query_from_queryjobconfig( From 43527af24e56994357205b482a86b805950d2d0f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 3 Sep 2025 18:27:17 +0200 Subject: [PATCH 509/536] chore(deps): update all dependencies (#2259) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 10 +++++----- samples/magics/requirements.txt | 6 +++--- samples/notebooks/requirements.txt | 10 +++++----- samples/snippets/requirements.txt | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index e7a02eca5..f86e57e5c 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index fa54cc229..c2bd74bed 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==25.3.0 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' click==8.2.1; python_version >= '3.10' click-plugins==1.1.1.2 @@ -13,7 +13,7 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 @@ -24,7 +24,7 @@ idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.3.1 +pandas==2.3.2 proto-plus==1.26.1 pyarrow==21.0.0 pyasn1==0.6.1 @@ -34,11 +34,11 @@ pyparsing==3.2.3 python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 -requests==2.32.4 +requests==2.32.5 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.14.1 +typing-extensions==4.15.0 typing-inspect==0.9.0 urllib3==2.5.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index e7230053c..7b4f84e8e 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.10.2 +bigquery_magics==0.10.3 db-dtypes==1.4.3 -google.cloud.bigquery==3.35.1 +google.cloud.bigquery==3.36.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.3.1 +pandas==2.3.2 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 829f08f47..dc22903c7 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ -bigquery-magics==0.10.2 +bigquery-magics==0.10.3 db-dtypes==1.4.3 -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.4.0; python_version >= '3.10' +ipython==9.5.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.5; python_version >= '3.10' -pandas==2.3.1 +matplotlib==3.10.6; python_version >= '3.10' +pandas==2.3.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index afa62b6b8..23da68d60 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 From 435ecdb62d8402fea317763e48934fa510ce8026 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 3 Sep 2025 14:30:49 -0400 Subject: [PATCH 510/536] bug: updates `__eq__` comparison on TableConstraint (#2274) * bug: updates __eq__ comparison on TableConstraint * updates tests * moves test out of class to accommodate pytest params --- google/cloud/bigquery/table.py | 6 +- tests/unit/test_table.py | 144 ++++++++++++++++----------------- 2 files changed, 71 insertions(+), 79 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 219b31467..5efcb1958 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -3574,9 +3574,9 @@ def __init__( def __eq__(self, other): if not isinstance(other, TableConstraints) and other is not None: raise TypeError("The value provided is not a BigQuery TableConstraints.") - return ( - self.primary_key == other.primary_key if other.primary_key else None - ) and (self.foreign_keys == other.foreign_keys if other.foreign_keys else None) + return self.primary_key == ( + other.primary_key if other.primary_key else None + ) and self.foreign_keys == (other.foreign_keys if other.foreign_keys else None) @classmethod def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index eb2c8d9ec..af31d116b 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -6322,82 +6322,6 @@ def test_constructor_explicit_with_none(self): self.assertIsNone(table_constraint.primary_key) self.assertIsNone(table_constraint.foreign_keys) - def test__eq__primary_key_mismatch(self): - from google.cloud.bigquery.table import ( - PrimaryKey, - ForeignKey, - TableReference, - ColumnReference, - ) - - foriegn_keys = [ - ForeignKey( - name="my_fk_id", - referenced_table=TableReference.from_string( - "my-project.my-dataset.my-table" - ), - column_references=[ - ColumnReference(referencing_column="id", referenced_column="id"), - ], - ), - ] - - table_constraint = self._make_one( - primary_key=PrimaryKey(columns=["my_pk_id"]), - foreign_keys=foriegn_keys, - ) - other_table_constraint = self._make_one( - primary_key=PrimaryKey(columns=["my_other_pk_id"]), - foreign_keys=foriegn_keys, - ) - - self.assertNotEqual(table_constraint, other_table_constraint) - - def test__eq__foreign_keys_mismatch(self): - from google.cloud.bigquery.table import ( - PrimaryKey, - ForeignKey, - TableReference, - ColumnReference, - ) - - primary_key = PrimaryKey(columns=["my_pk_id"]) - - table_constraint = self._make_one( - primary_key=primary_key, - foreign_keys=[ - ForeignKey( - name="my_fk_id", - referenced_table=TableReference.from_string( - "my-project.my-dataset.my-table" - ), - column_references=[ - ColumnReference( - referencing_column="id", referenced_column="id" - ), - ], - ), - ], - ) - other_table_constraint = self._make_one( - primary_key=primary_key, - foreign_keys=[ - ForeignKey( - name="my_other_fk_id", - referenced_table=TableReference.from_string( - "my-project.my-dataset.my-other-table" - ), - column_references=[ - ColumnReference( - referencing_column="other_id", referenced_column="other_id" - ), - ], - ), - ], - ) - - self.assertNotEqual(table_constraint, other_table_constraint) - def test__eq__other_type(self): from google.cloud.bigquery.table import ( PrimaryKey, @@ -6615,6 +6539,74 @@ def test_to_api_repr_empty_constraints(self): self.assertEqual(instance.to_api_repr(), expected) +@pytest.mark.parametrize( + "self_pk_name,self_fk_name,other_pk_name,other_fk_name,expected_equal", + [ + (None, None, None, None, True), + ("pkey", None, "pkey", None, True), + ("pkey", "fkey", "pkey", "fkey", True), + (None, "fkey", None, "fkey", True), + ("pkey", None, "pkey_no_match", None, False), + ("pkey", "fkey", "pkey_no_match", "fkey_no_match", False), + (None, "fkey", None, "fkey_no_match", False), + ("pkey", "fkey", "pkey_no_match", "fkey", False), + ("pkey", "fkey", "pkey", "fkey_no_match", False), + ], +) +def test_table_constraint_eq_parametrized( + self_pk_name, self_fk_name, other_pk_name, other_fk_name, expected_equal +): + # Imports are placed here to ensure they are self-contained for this example. + # In a real test file, they would likely be at the top of the file. + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + PrimaryKey, + TableReference, + TableConstraints, + ) + + # Helper function to create a PrimaryKey object or None + def _create_primary_key(name): + if name is None: + return None + return PrimaryKey(columns=[name]) + + # Helper function to create a list of ForeignKey objects or None + def _create_foreign_keys(name): + if name is None: + return None + # Using a generic referenced_table and column_references for simplicity + # The 'name' parameter ensures different ForeignKey objects for different names + return [ + ForeignKey( + name=name, + referenced_table=TableReference.from_string( + f"my-project.my-dataset.{name}_referenced_table" + ), + column_references=[ + ColumnReference( + referencing_column=f"{name}_ref_col", + referenced_column=f"{name}_pk_col", + ) + ], + ) + ] + + # Create the two TableConstraints instances for comparison + tc1 = TableConstraints( + primary_key=_create_primary_key(self_pk_name), + foreign_keys=_create_foreign_keys(self_fk_name), + ) + tc2 = TableConstraints( + primary_key=_create_primary_key(other_pk_name), + foreign_keys=_create_foreign_keys(other_fk_name), + ) + + # Assert the equality based on the expected outcome + assert (tc1 == tc2) == expected_equal + + class TestExternalCatalogTableOptions: PROJECT = "test-project" DATASET_ID = "test_dataset" From 8a13c12905ffcb3dbb6086a61df37556f0c2cd31 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 4 Sep 2025 13:25:49 -0500 Subject: [PATCH 511/536] docs: clarify the api_method arg for client.query() (#2277) * docs: clarify the api_method arg for client.query() --- google/cloud/bigquery/client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 4ca2cb428..ea592852a 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3519,7 +3519,8 @@ def query( specified here becomes the default ``job_retry`` for ``result()``, where it can also be specified. api_method (Union[str, enums.QueryApiMethod]): - Method with which to start the query job. + Method with which to start the query job. By default, + the jobs.insert API is used for starting a query. See :class:`google.cloud.bigquery.enums.QueryApiMethod` for details on the difference between the query start methods. From 33ea29616c06a2e2a106a785d216e784737ae386 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Sat, 6 Sep 2025 10:16:44 -0700 Subject: [PATCH 512/536] fix: remove deepcopy while setting properties for _QueryResults (#2280) --- google/cloud/bigquery/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 7f70f6a2a..170ed2976 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1400,7 +1400,7 @@ def _set_properties(self, api_response): api_response (Dict): Response returned from an API call """ self._properties.clear() - self._properties.update(copy.deepcopy(api_response)) + self._properties.update(api_response) def _query_param_from_api_repr(resource): From 6e88d7dbe42ebfc35986da665d656b49ac481db4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 8 Sep 2025 09:58:27 -0500 Subject: [PATCH 513/536] docs: clarify that the presence of `XyzJob.errors` doesn't necessarily mean that the job has not completed or was unsuccessful (#2278) Internal issue b/440349994 --- google/cloud/bigquery/job/base.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 1344082be..9b7ddb82d 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -693,7 +693,12 @@ def transaction_info(self) -> Optional[TransactionInfo]: @property def error_result(self): - """Error information about the job as a whole. + """Output only. Final error result of the job. + + If present, indicates that the job has completed and was unsuccessful. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.error_result Returns: Optional[Mapping]: the error information (None until set from the server). @@ -704,7 +709,13 @@ def error_result(self): @property def errors(self): - """Information about individual errors generated by the job. + """Output only. The first errors encountered during the running of the job. + + The final message includes the number of errors that caused the process to stop. + Errors here do not necessarily mean that the job has not completed or was unsuccessful. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.errors Returns: Optional[List[Mapping]]: @@ -716,7 +727,12 @@ def errors(self): @property def state(self): - """Status of the job. + """Output only. Running state of the job. + + Valid states include 'PENDING', 'RUNNING', and 'DONE'. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.state Returns: Optional[str]: From 4b0ef0cfcf7def138e43a22223abfcbefc330da2 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 9 Sep 2025 10:01:33 -0700 Subject: [PATCH 514/536] chore(main): release 3.37.0 (#2269) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 18 ++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62352c344..fe721dfde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.37.0](https://github.com/googleapis/python-bigquery/compare/v3.36.0...v3.37.0) (2025-09-08) + + +### Features + +* Updates to fastpath query execution ([#2268](https://github.com/googleapis/python-bigquery/issues/2268)) ([ef2740a](https://github.com/googleapis/python-bigquery/commit/ef2740a158199633b5543a7b6eb19587580792cd)) + + +### Bug Fixes + +* Remove deepcopy while setting properties for _QueryResults ([#2280](https://github.com/googleapis/python-bigquery/issues/2280)) ([33ea296](https://github.com/googleapis/python-bigquery/commit/33ea29616c06a2e2a106a785d216e784737ae386)) + + +### Documentation + +* Clarify that the presence of `XyzJob.errors` doesn't necessarily mean that the job has not completed or was unsuccessful ([#2278](https://github.com/googleapis/python-bigquery/issues/2278)) ([6e88d7d](https://github.com/googleapis/python-bigquery/commit/6e88d7dbe42ebfc35986da665d656b49ac481db4)) +* Clarify the api_method arg for client.query() ([#2277](https://github.com/googleapis/python-bigquery/issues/2277)) ([8a13c12](https://github.com/googleapis/python-bigquery/commit/8a13c12905ffcb3dbb6086a61df37556f0c2cd31)) + ## [3.36.0](https://github.com/googleapis/python-bigquery/compare/v3.35.1...v3.36.0) (2025-08-20) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a8f4c8e14..aa24ae04e 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.36.0" +__version__ = "3.37.0" From 7b1b718123afd80c0f68212946e4179bcd6db67f Mon Sep 17 00:00:00 2001 From: shollyman Date: Sat, 13 Sep 2025 08:02:19 -0700 Subject: [PATCH 515/536] feat: add additional query stats (#2270) * feat: add additional query stats This PR adds support for incremental query stats. --- google/cloud/bigquery/job/__init__.py | 2 + google/cloud/bigquery/job/query.py | 67 +++++++++++++++++++++++++++ tests/unit/job/test_query.py | 17 +++++++ tests/unit/job/test_query_stats.py | 61 ++++++++++++++++++++++++ 4 files changed, 147 insertions(+) diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index f51311b0b..4cda65965 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -39,6 +39,7 @@ from google.cloud.bigquery.job.query import QueryPlanEntryStep from google.cloud.bigquery.job.query import ScriptOptions from google.cloud.bigquery.job.query import TimelineEntry +from google.cloud.bigquery.job.query import IncrementalResultStats from google.cloud.bigquery.enums import Compression from google.cloud.bigquery.enums import CreateDisposition from google.cloud.bigquery.enums import DestinationFormat @@ -84,4 +85,5 @@ "SourceFormat", "TransactionInfo", "WriteDisposition", + "IncrementalResultStats", ] diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index b377f979d..38b8a7148 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -197,6 +197,66 @@ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": return cls(*args) +class IncrementalResultStats: + """IncrementalResultStats provides information about incremental query execution.""" + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource) -> "IncrementalResultStats": + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: object]): + IncrementalResultStats representation returned from API. + + Returns: + google.cloud.bigquery.job.IncrementalResultStats: + stats parsed from ``resource``. + """ + entry = cls() + entry._properties = resource + return entry + + @property + def disabled_reason(self): + """Optional[string]: Reason why incremental results were not + written by the query. + """ + return _helpers._str_or_none(self._properties.get("disabledReason")) + + @property + def result_set_last_replace_time(self): + """Optional[datetime]: The time at which the result table's contents + were completely replaced. May be absent if no results have been written + or the query has completed.""" + from google.cloud._helpers import _rfc3339_nanos_to_datetime + + value = self._properties.get("resultSetLastReplaceTime") + if value: + try: + return _rfc3339_nanos_to_datetime(value) + except ValueError: + pass + return None + + @property + def result_set_last_modify_time(self): + """Optional[datetime]: The time at which the result table's contents + were modified. May be absent if no results have been written or the + query has completed.""" + from google.cloud._helpers import _rfc3339_nanos_to_datetime + + value = self._properties.get("resultSetLastModifyTime") + if value: + try: + return _rfc3339_nanos_to_datetime(value) + except ValueError: + pass + return None + + class IndexUnusedReason(typing.NamedTuple): """Reason about why no search index was used in the search query (or sub-query). @@ -1339,6 +1399,13 @@ def bi_engine_stats(self) -> Optional[BiEngineStats]: else: return BiEngineStats.from_api_repr(stats) + @property + def incremental_result_stats(self) -> Optional[IncrementalResultStats]: + stats = self._job_statistics().get("incrementalResultStats") + if stats is None: + return None + return IncrementalResultStats.from_api_repr(stats) + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index ef6429598..4a6771c46 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -838,6 +838,23 @@ def test_search_stats(self): assert isinstance(job.search_stats, SearchStats) assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED" + def test_incremental_result_stats(self): + from google.cloud.bigquery.job.query import IncrementalResultStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.incremental_result_stats is None + + statistics = job._properties["statistics"] = {} + assert job.incremental_result_stats is None + + query_stats = statistics["query"] = {} + assert job.incremental_result_stats is None + + query_stats["incrementalResultStats"] = {"disabledReason": "BAZ"} + assert isinstance(job.incremental_result_stats, IncrementalResultStats) + assert job.incremental_result_stats.disabled_reason == "BAZ" + def test_reload_query_results_uses_transport_timeout(self): conn = make_connection({}) client = _make_client(self.PROJECT, connection=conn) diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py index 61b278d43..c7c7a31e0 100644 --- a/tests/unit/job/test_query_stats.py +++ b/tests/unit/job/test_query_stats.py @@ -13,6 +13,7 @@ # limitations under the License. from .helpers import _Base +import datetime class TestBiEngineStats: @@ -520,3 +521,63 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.pending_units, self.PENDING_UNITS) self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) + + +class TestIncrementalResultStats: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import IncrementalResultStats + + return IncrementalResultStats + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + stats = self._make_one() + assert stats.disabled_reason is None + assert stats.result_set_last_replace_time is None + assert stats.result_set_last_modify_time is None + + def test_from_api_repr_partial_stats(self): + klass = self._get_target_class() + stats = klass.from_api_repr({"disabledReason": "FOO"}) + + assert isinstance(stats, klass) + assert stats.disabled_reason == "FOO" + assert stats.result_set_last_replace_time is None + assert stats.result_set_last_modify_time is None + + def test_from_api_repr_full_stats(self): + klass = self._get_target_class() + stats = klass.from_api_repr( + { + "disabledReason": "BAR", + "resultSetLastReplaceTime": "2025-01-02T03:04:05.06Z", + "resultSetLastModifyTime": "2025-02-02T02:02:02.02Z", + } + ) + + assert isinstance(stats, klass) + assert stats.disabled_reason == "BAR" + assert stats.result_set_last_replace_time == datetime.datetime( + 2025, 1, 2, 3, 4, 5, 60000, tzinfo=datetime.timezone.utc + ) + assert stats.result_set_last_modify_time == datetime.datetime( + 2025, 2, 2, 2, 2, 2, 20000, tzinfo=datetime.timezone.utc + ) + + def test_from_api_repr_invalid_stats(self): + klass = self._get_target_class() + stats = klass.from_api_repr( + { + "disabledReason": "BAR", + "resultSetLastReplaceTime": "xxx", + "resultSetLastModifyTime": "yyy", + } + ) + + assert isinstance(stats, klass) + assert stats.disabled_reason == "BAR" + assert stats.result_set_last_replace_time is None + assert stats.result_set_last_modify_time is None From c9aba64c1f7240f1ad2caa00d55a1a4f86bdc8a3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 15 Sep 2025 14:21:21 +0200 Subject: [PATCH 516/536] chore(deps): update all dependencies (#2275) --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 10 +++++----- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 3bf52c85d..31b836790 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index f86e57e5c..21ccef2fd 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.36.0 +google-cloud-bigquery==3.37.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index d449b373b..6fb9ba310 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c2bd74bed..c8a93a35e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,6 +1,6 @@ attrs==25.3.0 certifi==2025.8.3 -cffi==1.17.1 +cffi==2.0.0 charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' click==8.2.1; python_version >= '3.10' @@ -13,8 +13,8 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.36.0 -google-cloud-bigquery-storage==2.32.0 +google-cloud-bigquery==3.37.0 +google-cloud-bigquery-storage==2.33.1 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 @@ -29,8 +29,8 @@ proto-plus==1.26.1 pyarrow==21.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 -pycparser==2.22 -pyparsing==3.2.3 +pycparser==2.23 +pyparsing==3.2.4 python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 3bf52c85d..31b836790 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 7b4f84e8e..d10d53c24 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.3 db-dtypes==1.4.3 -google.cloud.bigquery==3.36.0 -google-cloud-bigquery-storage==2.32.0 +google.cloud.bigquery==3.37.0 +google-cloud-bigquery-storage==2.33.1 ipython===8.18.1 pandas==2.3.2 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 3bf52c85d..31b836790 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index dc22903c7..f65008baa 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ bigquery-magics==0.10.3 db-dtypes==1.4.3 -google-cloud-bigquery==3.36.0 -google-cloud-bigquery-storage==2.32.0 +google-cloud-bigquery==3.37.0 +google-cloud-bigquery-storage==2.33.1 ipython===8.18.1; python_version == '3.9' ipython==9.5.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index cef3450e1..901f1df1a 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 23da68d60..1fed246f3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.36.0 +google-cloud-bigquery==3.37.0 From 7cad6cf2f95e28b46e529f99b5c4d3cc61603ca4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 17 Sep 2025 13:23:31 -0700 Subject: [PATCH 517/536] chore(main): release 3.38.0 (#2289) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe721dfde..95db5735c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.38.0](https://github.com/googleapis/python-bigquery/compare/v3.37.0...v3.38.0) (2025-09-15) + + +### Features + +* Add additional query stats ([#2270](https://github.com/googleapis/python-bigquery/issues/2270)) ([7b1b718](https://github.com/googleapis/python-bigquery/commit/7b1b718123afd80c0f68212946e4179bcd6db67f)) + ## [3.37.0](https://github.com/googleapis/python-bigquery/compare/v3.36.0...v3.37.0) (2025-09-08) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index aa24ae04e..22550a8f1 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.37.0" +__version__ = "3.38.0" From 1f1f9d41e8a2c9016198d848ad3f1cbb88cf77b0 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 19 Sep 2025 10:19:42 -0700 Subject: [PATCH 518/536] docs: remove experimental annotations from GA features (#2303) * docs: remove experimental annotations from GA features Corrects some documentation drift. --- google/cloud/bigquery/external_config.py | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index dc7a33e6a..7e76f93b5 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -637,11 +637,7 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": class HivePartitioningOptions(object): - """[Beta] Options that configure hive partitioning. - - .. note:: - **Experimental**. This feature is experimental and might change or - have limited support. + """Options that configure hive partitioning. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions @@ -808,13 +804,9 @@ def decimal_target_types(self, value: Optional[Iterable[str]]): @property def hive_partitioning(self): - """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ + """Optional[:class:`~.external_config.HivePartitioningOptions`]: When set, \ it configures hive partitioning support. - .. note:: - **Experimental**. This feature is experimental and might change or - have limited support. - See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options """ @@ -979,14 +971,8 @@ def timestamp_format(self, value: Optional[str]): @property def connection_id(self): - """Optional[str]: [Experimental] ID of a BigQuery Connection API + """Optional[str]: ID of a BigQuery Connection API resource. - - .. WARNING:: - - This feature is experimental. Pre-GA features may have limited - support, and changes to pre-GA features may not be compatible with - other pre-GA versions. """ return self._properties.get("connectionId") From 8bbd3d01026c493dfa5903b397d2b01c0e9bf43b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 24 Sep 2025 14:29:28 -0500 Subject: [PATCH 519/536] fix: remove ambiguous error codes from query retries (#2308) Context: internal issue b/445984807 comment 10. --- google/cloud/bigquery/retry.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 8f469f2d3..19012efd6 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -121,9 +121,6 @@ def _should_retry_get_job_conflict(exc): """ job_retry_reasons = ( - "rateLimitExceeded", - "backendError", - "internalError", "jobBackendError", "jobInternalError", "jobRateLimitExceeded", From 63d7737cb971c0c96d0b6d3505e30e4696a7bf2a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 30 Sep 2025 17:05:06 +0100 Subject: [PATCH 520/536] chore(deps): update all dependencies (#2300) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 16 ++++++++-------- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements.txt | 6 +++--- samples/snippets/requirements.txt | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 21ccef2fd..56696f868 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.37.0 +google-cloud-bigquery==3.38.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c8a93a35e..9fdca241a 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -3,7 +3,7 @@ certifi==2025.8.3 cffi==2.0.0 charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' -click==8.2.1; python_version >= '3.10' +click==8.3.0; python_version >= '3.10' click-plugins==1.1.1.2 cligj==0.7.2 db-dtypes==1.4.3 @@ -12,32 +12,32 @@ geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 -google-auth==2.40.3 -google-cloud-bigquery==3.37.0 +google-auth==2.41.0 +google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.74.0 +grpcio==1.75.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.3.2 +pandas==2.3.3 proto-plus==1.26.1 pyarrow==21.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.23 -pyparsing==3.2.4 +pyparsing==3.2.5 python-dateutil==2.9.0.post0 pytz==2025.2 -PyYAML==6.0.2 +PyYAML==6.0.3 requests==2.32.5 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' -Shapely==2.1.1; python_version >= '3.10' +Shapely==2.1.2; python_version >= '3.10' six==1.17.0 typing-extensions==4.15.0 typing-inspect==0.9.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index d10d53c24..331e910e2 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.3 db-dtypes==1.4.3 -google.cloud.bigquery==3.37.0 +google.cloud.bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 ipython===8.18.1 -pandas==2.3.2 +pandas==2.3.3 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index f65008baa..ef509734a 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ bigquery-magics==0.10.3 db-dtypes==1.4.3 -google-cloud-bigquery==3.37.0 +google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 ipython===8.18.1; python_version == '3.9' -ipython==9.5.0; python_version >= '3.10' +ipython==9.6.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.6; python_version >= '3.10' -pandas==2.3.2 +pandas==2.3.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1fed246f3..441385536 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.37.0 +google-cloud-bigquery==3.38.0 From e118b029bbc89a5adbab83f39858c356c23665bf Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 30 Sep 2025 13:54:00 -0400 Subject: [PATCH 521/536] fix: honor custom `retry` in `job.result()` (#2302) * fix(job): honor custom retry in job.result() The `_AsyncJob.result()` method was not correctly passing the `retry` argument to the superclass's `result()` method when the `retry` object was the same as the default retry object. This caused the default retry settings to be ignored in some cases. This change modifies the `result()` method to always pass the `retry` argument to the superclass, ensuring that the provided retry settings are always honored. A new test case is added to verify that `job.result()` correctly handles both the default retry and a custom retry object. * Update tests/unit/test_job_retry.py * Update tests/unit/test_job_retry.py * blacken and lint * udpates retry handling and testing of retry handling * Update tests/unit/test_job_retry.py * Update tests/unit/test_job_retry.py * Update tests/unit/test_job_retry.py --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/job/base.py | 3 +- tests/unit/test_job_retry.py | 77 +++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 9b7ddb82d..7576fc9aa 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -1044,8 +1044,7 @@ def result( # type: ignore # (incompatible with supertype) if self.state is None: self._begin(retry=retry, timeout=timeout) - kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} - return super(_AsyncJob, self).result(timeout=timeout, **kwargs) + return super(_AsyncJob, self).result(timeout=timeout, retry=retry) def cancelled(self): """Check if the job has been cancelled. diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 7343fed3d..fa55e8f6a 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -615,3 +615,80 @@ def test_query_and_wait_retries_job_for_DDL_queries(global_time_lock): _, kwargs = calls[3] assert kwargs["method"] == "POST" assert kwargs["path"] == query_request_path + + +@pytest.mark.parametrize( + "result_retry_param", + [ + pytest.param( + {}, + id="default retry {}", + ), + pytest.param( + { + "retry": google.cloud.bigquery.retry.DEFAULT_RETRY.with_timeout( + timeout=10.0 + ) + }, + id="custom retry object with timeout 10.0", + ), + ], +) +def test_retry_load_job_result(result_retry_param, PROJECT, DS_ID): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job.load import LoadJob + import google.cloud.bigquery.retry + + client = make_client() + conn = client._connection = make_connection( + dict( + status=dict(state="RUNNING"), + jobReference={"jobId": "id_1"}, + ), + google.api_core.exceptions.ServiceUnavailable("retry me"), + dict( + status=dict(state="DONE"), + jobReference={"jobId": "id_1"}, + statistics={"load": {"outputRows": 1}}, + ), + ) + + table_ref = DatasetReference(project=PROJECT, dataset_id=DS_ID).table("new_table") + job = LoadJob("id_1", source_uris=None, destination=table_ref, client=client) + + with mock.patch.object( + client, "_call_api", wraps=client._call_api + ) as wrapped_call_api: + result = job.result(**result_retry_param) + + assert job.state == "DONE" + assert result.output_rows == 1 + + # Check that _call_api was called multiple times due to retry + assert wrapped_call_api.call_count > 1 + + # Verify the retry object used in the calls to _call_api + expected_retry = result_retry_param.get( + "retry", google.cloud.bigquery.retry.DEFAULT_RETRY + ) + + for call in wrapped_call_api.mock_calls: + name, args, kwargs = call + # The retry object is the first positional argument to _call_api + called_retry = args[0] + + # We only care about the calls made during the job.result() polling + if kwargs.get("method") == "GET" and "jobs/id_1" in kwargs.get("path", ""): + assert called_retry._predicate == expected_retry._predicate + assert called_retry._initial == expected_retry._initial + assert called_retry._maximum == expected_retry._maximum + assert called_retry._multiplier == expected_retry._multiplier + assert called_retry._deadline == expected_retry._deadline + if "retry" in result_retry_param: + # Specifically check the timeout for the custom retry case + assert called_retry._timeout == 10.0 + else: + assert called_retry._timeout == expected_retry._timeout + + # The number of api_request calls should still be 3 + assert conn.api_request.call_count == 3 From 9a6a1ab4396f43c5fab3fcc646824761182a9310 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 1 Oct 2025 21:59:07 +0100 Subject: [PATCH 522/536] chore(deps): update dependency google-auth to v2.41.1 (#2312) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 9fdca241a..0a5d18d6d 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 -google-auth==2.41.0 +google-auth==2.41.1 google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 google-cloud-core==2.4.3 From 3e116c2d9a0232f72c9cda9e1e0f5b1d767d1591 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 7 Oct 2025 00:16:21 +0100 Subject: [PATCH 523/536] chore(deps): update all dependencies (#2314) --- samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 0a5d18d6d..8955e0cfd 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ -attrs==25.3.0 -certifi==2025.8.3 +attrs==25.4.0 +certifi==2025.10.5 cffi==2.0.0 charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' @@ -11,7 +11,7 @@ Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' -google-api-core==2.25.1 +google-api-core==2.25.2 google-auth==2.41.1 google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 From 4251fee3c5113ccc14fdb11172548b5427caeb59 Mon Sep 17 00:00:00 2001 From: Wabio Date: Mon, 13 Oct 2025 18:57:19 +0200 Subject: [PATCH 524/536] build: update `pyproject.toml` to follow PEP 639 (#2309) * update pyproject.toml to follow PEP 639 * Update pyproject.toml PEP 639 Thanks for the feedback, I've removed the version number completely as requested. * Update pyproject.toml --------- Co-authored-by: Chalmer Lowe --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9c91a2fc8..1c6ec1f77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,8 @@ build-backend = "setuptools.build_meta" [project] name = "google-cloud-bigquery" authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }] -license = { text = "Apache 2.0" } +license = "Apache-2.0" +license-files = ["LICENSE"] requires-python = ">=3.9" description = "Google BigQuery API client library" readme = "README.rst" @@ -30,7 +31,6 @@ classifiers = [ # "Development Status :: 5 - Production/Stable" "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", From 7fbd8c22672905b7ab0069a4d6edfee44cca40d0 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 14 Oct 2025 07:23:10 -0400 Subject: [PATCH 525/536] chore(python): Add Python 3.14 to python post processor image (#2317) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): Add Python 3.14 to python post processor image Source-Link: https://github.com/googleapis/synthtool/commit/16790a32126759493ba20781e04edd165825ff82 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:543e209e7c1c1ffe720eb4db1a3f045a75099304fb19aa11a47dc717b8aae2a9 * Update samples/snippets/noxfile.py * Update samples/notebooks/noxfile.py * Update samples/magics/noxfile.py * Update samples/geography/noxfile.py * Update samples/desktopapp/noxfile.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .github/.OwlBot.lock.yaml | 4 +- .kokoro/samples/python3.14/common.cfg | 40 ++++++++++++++++++++ .kokoro/samples/python3.14/continuous.cfg | 6 +++ .kokoro/samples/python3.14/periodic-head.cfg | 11 ++++++ .kokoro/samples/python3.14/periodic.cfg | 6 +++ .kokoro/samples/python3.14/presubmit.cfg | 6 +++ samples/desktopapp/noxfile.py | 2 +- samples/geography/noxfile.py | 2 +- samples/magics/noxfile.py | 2 +- samples/notebooks/noxfile.py | 2 +- samples/snippets/noxfile.py | 2 +- 11 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 .kokoro/samples/python3.14/common.cfg create mode 100644 .kokoro/samples/python3.14/continuous.cfg create mode 100644 .kokoro/samples/python3.14/periodic-head.cfg create mode 100644 .kokoro/samples/python3.14/periodic.cfg create mode 100644 .kokoro/samples/python3.14/presubmit.cfg diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index cea9eb68f..4a311db02 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3b3a31be60853477bc39ed8d9bac162cac3ba083724cecaad54eb81d4e4dae9c -# created: 2025-04-16T22:40:03.123475241Z + digest: sha256:543e209e7c1c1ffe720eb4db1a3f045a75099304fb19aa11a47dc717b8aae2a9 +# created: 2025-10-09T14:48:42.914384887Z diff --git a/.kokoro/samples/python3.14/common.cfg b/.kokoro/samples/python3.14/common.cfg new file mode 100644 index 000000000..d2fcee553 --- /dev/null +++ b/.kokoro/samples/python3.14/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.14" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-314" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" diff --git a/.kokoro/samples/python3.14/continuous.cfg b/.kokoro/samples/python3.14/continuous.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.14/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.14/periodic-head.cfg b/.kokoro/samples/python3.14/periodic-head.cfg new file mode 100644 index 000000000..5aa01bab5 --- /dev/null +++ b/.kokoro/samples/python3.14/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.14/periodic.cfg b/.kokoro/samples/python3.14/periodic.cfg new file mode 100644 index 000000000..71cd1e597 --- /dev/null +++ b/.kokoro/samples/python3.14/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/.kokoro/samples/python3.14/presubmit.cfg b/.kokoro/samples/python3.14/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.14/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/samples/desktopapp/noxfile.py b/samples/desktopapp/noxfile.py index a86590382..db2333e5a 100644 --- a/samples/desktopapp/noxfile.py +++ b/samples/desktopapp/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index a86590382..db2333e5a 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index a86590382..db2333e5a 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/notebooks/noxfile.py b/samples/notebooks/noxfile.py index a86590382..db2333e5a 100644 --- a/samples/notebooks/noxfile.py +++ b/samples/notebooks/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index a86590382..db2333e5a 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] From fa76e310a16ea6cba0071ff1d767ca1c71514da7 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 15 Oct 2025 03:31:50 -0700 Subject: [PATCH 526/536] feat: Add ExternalRuntimeOptions to BigQuery routine (#2311) * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. * Update google/cloud/bigquery/routine/routine.py * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. - Added additional tests to improve code coverage based on feedback. * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. - Added additional tests to improve code coverage based on feedback. - Addressed PyType errors by using helper functions for type conversion. * Update tests/unit/routine/test_external_runtime_options.py * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. - Added additional tests to improve code coverage based on feedback. - Addressed PyType errors by using helper functions for type conversion. - Addressed formatting nits from code review. --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Chalmer Lowe Co-authored-by: Lingqing Gan --- google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/routine/__init__.py | 2 + google/cloud/bigquery/routine/routine.py | 185 ++++++++++++++++- .../routine/test_external_runtime_options.py | 191 ++++++++++++++++++ tests/unit/routine/test_routine.py | 42 ++++ 5 files changed, 421 insertions(+), 1 deletion(-) create mode 100644 tests/unit/routine/test_external_runtime_options.py diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index d39c71641..904bea3d4 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -98,6 +98,7 @@ from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.routine import RemoteFunctionOptions +from google.cloud.bigquery.routine import ExternalRuntimeOptions from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import FieldElementType @@ -181,6 +182,7 @@ "RoutineArgument", "RoutineReference", "RemoteFunctionOptions", + "ExternalRuntimeOptions", # Shared helpers "SchemaField", "FieldElementType", diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py index e576b0d49..025103957 100644 --- a/google/cloud/bigquery/routine/__init__.py +++ b/google/cloud/bigquery/routine/__init__.py @@ -21,6 +21,7 @@ from google.cloud.bigquery.routine.routine import RoutineReference from google.cloud.bigquery.routine.routine import RoutineType from google.cloud.bigquery.routine.routine import RemoteFunctionOptions +from google.cloud.bigquery.routine.routine import ExternalRuntimeOptions __all__ = ( @@ -30,4 +31,5 @@ "RoutineReference", "RoutineType", "RemoteFunctionOptions", + "ExternalRuntimeOptions", ) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index e933fa137..c5aa8750e 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -15,7 +15,7 @@ # limitations under the License. """Define resources for the BigQuery Routines API.""" - +import typing from typing import Any, Dict, Optional, Union import google.cloud._helpers # type: ignore @@ -69,6 +69,7 @@ class Routine(object): "determinism_level": "determinismLevel", "remote_function_options": "remoteFunctionOptions", "data_governance_type": "dataGovernanceType", + "external_runtime_options": "externalRuntimeOptions", } def __init__(self, routine_ref, **kwargs) -> None: @@ -349,6 +350,37 @@ def data_governance_type(self, value): ) self._properties[self._PROPERTY_TO_API_FIELD["data_governance_type"]] = value + @property + def external_runtime_options(self): + """Optional[google.cloud.bigquery.routine.ExternalRuntimeOptions]: + Configures the external runtime options for a routine. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.routine.ExternalRuntimeOptions` or + :data:`None`. + """ + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_runtime_options"] + ) + if prop is not None: + return ExternalRuntimeOptions.from_api_repr(prop) + + @external_runtime_options.setter + def external_runtime_options(self, value): + api_repr = value + if isinstance(value, ExternalRuntimeOptions): + api_repr = value.to_api_repr() + elif value is not None: + raise ValueError( + "value must be google.cloud.bigquery.routine.ExternalRuntimeOptions " + "or None" + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_runtime_options"] + ] = api_repr + @classmethod def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. @@ -736,3 +768,154 @@ def __repr__(self): for property_name in sorted(self._PROPERTY_TO_API_FIELD) ] return "RemoteFunctionOptions({})".format(", ".join(all_properties)) + + +class ExternalRuntimeOptions(object): + """Options for the runtime of the external system. + + Args: + container_memory (str): + Optional. Amount of memory provisioned for a Python UDF container + instance. Format: {number}{unit} where unit is one of "M", "G", "Mi" + and "Gi" (e.g. 1G, 512Mi). If not specified, the default value is + 512Mi. For more information, see `Configure container limits for + Python UDFs `_ + container_cpu (int): + Optional. Amount of CPU provisioned for a Python UDF container + instance. For more information, see `Configure container limits + for Python UDFs `_ + runtime_connection (str): + Optional. Fully qualified name of the connection whose service account + will be used to execute the code in the container. Format: + "projects/{projectId}/locations/{locationId}/connections/{connectionId}" + max_batching_rows (int): + Optional. Maximum number of rows in each batch sent to the external + runtime. If absent or if 0, BigQuery dynamically decides the number of + rows in a batch. + runtime_version (str): + Optional. Language runtime version. Example: python-3.11. + """ + + _PROPERTY_TO_API_FIELD = { + "container_memory": "containerMemory", + "container_cpu": "containerCpu", + "runtime_connection": "runtimeConnection", + "max_batching_rows": "maxBatchingRows", + "runtime_version": "runtimeVersion", + } + + def __init__( + self, + container_memory: Optional[str] = None, + container_cpu: Optional[int] = None, + runtime_connection: Optional[str] = None, + max_batching_rows: Optional[int] = None, + runtime_version: Optional[str] = None, + _properties: Optional[Dict] = None, + ) -> None: + if _properties is None: + _properties = {} + self._properties = _properties + + if container_memory is not None: + self.container_memory = container_memory + if container_cpu is not None: + self.container_cpu = container_cpu + if runtime_connection is not None: + self.runtime_connection = runtime_connection + if max_batching_rows is not None: + self.max_batching_rows = max_batching_rows + if runtime_version is not None: + self.runtime_version = runtime_version + + @property + def container_memory(self) -> Optional[str]: + """Optional. Amount of memory provisioned for a Python UDF container instance.""" + return _helpers._str_or_none(self._properties.get("containerMemory")) + + @container_memory.setter + def container_memory(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("container_memory must be a string or None.") + self._properties["containerMemory"] = value + + @property + def container_cpu(self) -> Optional[int]: + """Optional. Amount of CPU provisioned for a Python UDF container instance.""" + return _helpers._int_or_none(self._properties.get("containerCpu")) + + @container_cpu.setter + def container_cpu(self, value: Optional[int]): + if value is not None and not isinstance(value, int): + raise ValueError("container_cpu must be an integer or None.") + self._properties["containerCpu"] = value + + @property + def runtime_connection(self) -> Optional[str]: + """Optional. Fully qualified name of the connection.""" + return _helpers._str_or_none(self._properties.get("runtimeConnection")) + + @runtime_connection.setter + def runtime_connection(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("runtime_connection must be a string or None.") + self._properties["runtimeConnection"] = value + + @property + def max_batching_rows(self) -> Optional[int]: + """Optional. Maximum number of rows in each batch sent to the external runtime.""" + return typing.cast( + int, _helpers._int_or_none(self._properties.get("maxBatchingRows")) + ) + + @max_batching_rows.setter + def max_batching_rows(self, value: Optional[int]): + if value is not None and not isinstance(value, int): + raise ValueError("max_batching_rows must be an integer or None.") + self._properties["maxBatchingRows"] = _helpers._str_or_none(value) + + @property + def runtime_version(self) -> Optional[str]: + """Optional. Language runtime version.""" + return _helpers._str_or_none(self._properties.get("runtimeVersion")) + + @runtime_version.setter + def runtime_version(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("runtime_version must be a string or None.") + self._properties["runtimeVersion"] = value + + @classmethod + def from_api_repr(cls, resource: dict) -> "ExternalRuntimeOptions": + """Factory: construct external runtime options given its API representation. + Args: + resource (Dict[str, object]): Resource, as returned from the API. + Returns: + google.cloud.bigquery.routine.ExternalRuntimeOptions: + Python object, as parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self) -> dict: + """Construct the API resource representation of this ExternalRuntimeOptions. + Returns: + Dict[str, object]: External runtime options represented as an API resource. + """ + return self._properties + + def __eq__(self, other): + if not isinstance(other, ExternalRuntimeOptions): + return NotImplemented + return self._properties == other._properties + + def __ne__(self, other): + return not self == other + + def __repr__(self): + all_properties = [ + "{}={}".format(property_name, repr(getattr(self, property_name))) + for property_name in sorted(self._PROPERTY_TO_API_FIELD) + ] + return "ExternalRuntimeOptions({})".format(", ".join(all_properties)) diff --git a/tests/unit/routine/test_external_runtime_options.py b/tests/unit/routine/test_external_runtime_options.py new file mode 100644 index 000000000..d4edaae9a --- /dev/null +++ b/tests/unit/routine/test_external_runtime_options.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery.routine.routine import ExternalRuntimeOptions + + return ExternalRuntimeOptions + + +@pytest.fixture +def object_under_test(target_class): + return target_class() + + +def test_ctor(target_class): + container_memory = "1G" + container_cpu = 1 + runtime_connection = ( + "projects/my-project/locations/us-central1/connections/my-connection" + ) + max_batching_rows = 100 + runtime_version = "python-3.11" + + instance = target_class( + container_memory=container_memory, + container_cpu=container_cpu, + runtime_connection=runtime_connection, + max_batching_rows=max_batching_rows, + runtime_version=runtime_version, + ) + + assert instance.container_memory == container_memory + assert instance.container_cpu == container_cpu + assert instance.runtime_connection == runtime_connection + assert instance.max_batching_rows == max_batching_rows + assert instance.runtime_version == runtime_version + + +def test_container_memory(object_under_test): + container_memory = "512Mi" + object_under_test.container_memory = container_memory + assert object_under_test.container_memory == container_memory + + +def test_container_cpu(object_under_test): + container_cpu = 1 + object_under_test.container_cpu = container_cpu + assert object_under_test.container_cpu == container_cpu + + +def test_runtime_connection(object_under_test): + runtime_connection = ( + "projects/my-project/locations/us-central1/connections/my-connection" + ) + object_under_test.runtime_connection = runtime_connection + assert object_under_test.runtime_connection == runtime_connection + + +def test_max_batching_rows(object_under_test): + max_batching_rows = 100 + object_under_test.max_batching_rows = max_batching_rows + assert object_under_test.max_batching_rows == max_batching_rows + + +def test_runtime_version(object_under_test): + runtime_version = "python-3.11" + object_under_test.runtime_version = runtime_version + assert object_under_test.runtime_version == runtime_version + + +def test_ctor_w_properties(target_class): + properties = { + "containerMemory": "1G", + "containerCpu": 1, + } + instance = target_class(_properties=properties) + assert instance._properties == properties + + +def test_ne(target_class): + instance1 = target_class(container_memory="1G") + instance2 = target_class(container_memory="2G") + assert instance1 != instance2 + + +def test_ne_false(target_class): + instance1 = target_class(container_memory="1G") + instance2 = target_class(container_memory="1G") + assert not (instance1 != instance2) + + +def test_eq_not_implemented(object_under_test): + assert not (object_under_test == object()) + assert object_under_test != object() + + +def test_from_api_repr(target_class): + resource = { + "containerMemory": "1G", + "containerCpu": 1, + "runtimeConnection": "projects/my-project/locations/us-central1/connections/my-connection", + "maxBatchingRows": "100", + "runtimeVersion": "python-3.11", + } + instance = target_class.from_api_repr(resource) + + assert instance.container_memory == "1G" + assert instance.container_cpu == 1 + assert ( + instance.runtime_connection + == "projects/my-project/locations/us-central1/connections/my-connection" + ) + assert instance.max_batching_rows == 100 + assert instance.runtime_version == "python-3.11" + + +def test_to_api_repr(target_class): + instance = target_class( + container_memory="1G", + container_cpu=1, + runtime_connection="projects/my-project/locations/us-central1/connections/my-connection", + max_batching_rows=100, + runtime_version="python-3.11", + ) + resource = instance.to_api_repr() + + assert resource == { + "containerMemory": "1G", + "containerCpu": 1, + "runtimeConnection": "projects/my-project/locations/us-central1/connections/my-connection", + "maxBatchingRows": "100", + "runtimeVersion": "python-3.11", + } + + +def test_repr(target_class): + instance = target_class( + container_memory="1G", + container_cpu=1, + ) + expected_repr = ( + "ExternalRuntimeOptions(container_cpu=1, container_memory='1G', " + "max_batching_rows=None, runtime_connection=None, runtime_version=None)" + ) + assert repr(instance) == expected_repr + + +def test_invalid_container_memory(object_under_test): + with pytest.raises(ValueError, match="container_memory must be a string or None."): + object_under_test.container_memory = 123 + + +def test_invalid_container_cpu(object_under_test): + with pytest.raises(ValueError, match="container_cpu must be an integer or None."): + object_under_test.container_cpu = "1" + + +def test_invalid_runtime_connection(object_under_test): + with pytest.raises( + ValueError, match="runtime_connection must be a string or None." + ): + object_under_test.runtime_connection = 123 + + +def test_invalid_max_batching_rows(object_under_test): + with pytest.raises( + ValueError, match="max_batching_rows must be an integer or None." + ): + object_under_test.max_batching_rows = "100" + + +def test_invalid_runtime_version(object_under_test): + with pytest.raises(ValueError, match="runtime_version must be a string or None."): + object_under_test.runtime_version = 123 diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index acd3bc40e..965c6b2eb 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -81,6 +81,13 @@ def test_ctor_w_properties(target_class): max_batching_rows=99, user_defined_context={"foo": "bar"}, ) + external_runtime_options = bigquery.ExternalRuntimeOptions( + container_memory="1G", + container_cpu=1, + runtime_connection="projects/p/locations/l/connections/c", + max_batching_rows=100, + runtime_version="python-3.11", + ) actual_routine = target_class( routine_id, @@ -92,6 +99,7 @@ def test_ctor_w_properties(target_class): description=description, determinism_level=determinism_level, remote_function_options=options, + external_runtime_options=external_runtime_options, ) ref = RoutineReference.from_string(routine_id) @@ -106,6 +114,7 @@ def test_ctor_w_properties(target_class): actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC ) assert actual_routine.remote_function_options == options + assert actual_routine.external_runtime_options == external_runtime_options def test_ctor_invalid_remote_function_options(target_class): @@ -119,6 +128,17 @@ def test_ctor_invalid_remote_function_options(target_class): ) +def test_ctor_invalid_external_runtime_options(target_class): + with pytest.raises( + ValueError, + match=".*must be google.cloud.bigquery.routine.ExternalRuntimeOptions.*", + ): + target_class( + "my-proj.my_dset.my_routine", + external_runtime_options=object(), + ) + + def test_from_api_repr(target_class): from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference @@ -155,6 +175,13 @@ def test_from_api_repr(target_class): }, }, "dataGovernanceType": "DATA_MASKING", + "externalRuntimeOptions": { + "containerMemory": "1G", + "containerCpu": 1, + "runtimeConnection": "projects/p/locations/l/connections/c", + "maxBatchingRows": 100, + "runtimeVersion": "python-3.11", + }, } actual_routine = target_class.from_api_repr(resource) @@ -194,6 +221,14 @@ def test_from_api_repr(target_class): assert actual_routine.remote_function_options.max_batching_rows == 50 assert actual_routine.remote_function_options.user_defined_context == {"foo": "bar"} assert actual_routine.data_governance_type == "DATA_MASKING" + assert actual_routine.external_runtime_options.container_memory == "1G" + assert actual_routine.external_runtime_options.container_cpu == 1 + assert ( + actual_routine.external_runtime_options.runtime_connection + == "projects/p/locations/l/connections/c" + ) + assert actual_routine.external_runtime_options.max_batching_rows == 100 + assert actual_routine.external_runtime_options.runtime_version == "python-3.11" def test_from_api_repr_tvf_function(target_class): @@ -297,6 +332,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.determinism_level is None assert actual_routine.remote_function_options is None assert actual_routine.data_governance_type is None + assert actual_routine.external_runtime_options is None def test_from_api_repr_w_unknown_fields(target_class): @@ -571,6 +607,12 @@ def test_set_remote_function_options_w_none(object_under_test): assert object_under_test._properties["remoteFunctionOptions"] is None +def test_set_external_runtime_options_w_none(object_under_test): + object_under_test.external_runtime_options = None + assert object_under_test.external_runtime_options is None + assert object_under_test._properties["externalRuntimeOptions"] is None + + def test_set_data_governance_type_w_none(object_under_test): object_under_test.data_governance_type = None assert object_under_test.data_governance_type is None From b11e09cb6ee32e451b37eda66bece2220b9ceaba Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Wed, 15 Oct 2025 14:30:12 -0700 Subject: [PATCH 527/536] fix: include `io.Base` in the `PathType` (#2323) --- google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index ea592852a..c50e7c2d7 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -139,7 +139,7 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. - PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] + PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes], io.IOBase] _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 From 6065e14c448cb430189982dd70025fa0575777ca Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 20 Oct 2025 15:06:52 -0400 Subject: [PATCH 528/536] feat: adds support for Python runtime 3.14 (#2322) * feat: adds support for Python runtime 3.14 * adds step to install gdal * adds files required by pyarrow * adds repo required by pyarrow * corrects url to repo required by pyarrow * testing a theory with a conditional * testing a theory with a conditional version of ubuntu * testing a new approach to installing arrow * testing a new approach to dearmoring the key * back to the basics * trying a conditional again. * adds explanatory comment resets ubuntu version to latest * Apply suggestion from @chalmerlowe * Apply suggestion from @chalmerlowe * Apply suggestion from @chalmerlowe * Apply suggestion from @chalmerlowe --- .github/sync-repo-settings.yaml | 1 + .github/workflows/unittest.yml | 11 ++++++++--- CONTRIBUTING.rst | 4 +++- noxfile.py | 2 +- owlbot.py | 2 +- pyproject.toml | 3 +++ testing/constraints-3.14.txt | 2 ++ 7 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 testing/constraints-3.14.txt diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 1e61b4d65..ac91806eb 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -19,6 +19,7 @@ branchProtectionRules: - 'Samples - Python 3.11' - 'Samples - Python 3.12' - 'Samples - Python 3.13' + - 'Samples - Python 3.14' - pattern: v2 requiresLinearHistory: true requiresCodeOwnerReviews: true diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 24c9ddbaf..f6b92547e 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -5,11 +5,10 @@ on: name: unittest jobs: unit: - # Use `ubuntu-latest` runner. runs-on: ubuntu-latest strategy: matrix: - python: ['3.9', '3.11', '3.12', '3.13'] + python: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] steps: - name: Checkout uses: actions/checkout@v4 @@ -22,6 +21,12 @@ jobs: python -m pip install --upgrade setuptools pip wheel python -m pip install nox - name: Run unit tests + + # TODO (https://b.corp.google.com/issues/450370502) 3.14 is not yet supported by pyarrow. See + # https://github.com/googleapis/google-cloud-python/issues/14686 + # https://github.com/apache/arrow/issues/47438 + # Reinstate running tests with 3.14 once this bug is fixed + if: matrix.python != '3.14' env: COVERAGE_FILE: .coverage-${{ matrix.python }} run: | @@ -38,7 +43,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ['3.9', '3.13'] + python: ['3.9', '3.14'] steps: - name: Checkout uses: actions/checkout@v4 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index b2993768b..3f8653f4b 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. + 3.9, 3.10, 3.11, 3.12, 3.13 and 3.14 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -226,12 +226,14 @@ We support: - `Python 3.11`_ - `Python 3.12`_ - `Python 3.13`_ +- `Python 3.14`_ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ .. _Python 3.12: https://docs.python.org/3.12/ .. _Python 3.13: https://docs.python.org/3.13/ +.. _Python 3.14: https://docs.python.org/3.14/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/noxfile.py b/noxfile.py index eb79c238d..2457382fb 100644 --- a/noxfile.py +++ b/noxfile.py @@ -39,7 +39,7 @@ DEFAULT_PYTHON_VERSION = "3.9" SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] -UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() diff --git a/owlbot.py b/owlbot.py index 80cf9d6e3..bd694180f 100644 --- a/owlbot.py +++ b/owlbot.py @@ -56,7 +56,7 @@ "pandas": "https://pandas.pydata.org/pandas-docs/stable/", }, system_test_python_versions=["3.9", "3.13"], - unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13"], + unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"], default_python_version="3.9", ) diff --git a/pyproject.toml b/pyproject.toml index 1c6ec1f77..a0e356b34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", "Topic :: Internet", ] @@ -69,6 +70,7 @@ bqstorage = [ # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.47.0, < 2.0.0", "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", + "grpcio >= 1.75.1, < 2.0.0; python_version >= '3.14'", "pyarrow >= 4.0.0", ] pandas = [ @@ -76,6 +78,7 @@ pandas = [ "pandas-gbq >= 0.26.1", "grpcio >= 1.47.0, < 2.0.0", "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", + "grpcio >= 1.75.1, < 2.0.0; python_version >= '3.14'", "pyarrow >= 3.0.0", "db-dtypes >= 1.0.4, < 2.0.0", ] diff --git a/testing/constraints-3.14.txt b/testing/constraints-3.14.txt new file mode 100644 index 000000000..6bd20f5fb --- /dev/null +++ b/testing/constraints-3.14.txt @@ -0,0 +1,2 @@ +# Constraints for Python 3.14 +grpcio >= 1.75.1 From 68e915f5c6a6106a7d9ccd9f76c54cd7088a8870 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 7 Nov 2025 13:29:40 -0500 Subject: [PATCH 529/536] chore(librarian): onboard to librarian (#2326) Towards https://github.com/googleapis/librarian/issues/2456 Files removed which is no longer used - Owlbot config files, including owlbot.py - Sync repo settings config file - Release please config files --- .github/.OwlBot.lock.yaml | 17 ----- .github/.OwlBot.yaml | 22 ------ .github/auto-approve.yml | 3 - .github/release-please.yml | 14 ---- .github/release-trigger.yml | 2 - .github/sync-repo-settings.yaml | 32 -------- .librarian/state.yaml | 10 +++ owlbot.py | 126 -------------------------------- 8 files changed, 10 insertions(+), 216 deletions(-) delete mode 100644 .github/.OwlBot.lock.yaml delete mode 100644 .github/.OwlBot.yaml delete mode 100644 .github/auto-approve.yml delete mode 100644 .github/release-please.yml delete mode 100644 .github/release-trigger.yml delete mode 100644 .github/sync-repo-settings.yaml create mode 100644 .librarian/state.yaml delete mode 100644 owlbot.py diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml deleted file mode 100644 index 4a311db02..000000000 --- a/.github/.OwlBot.lock.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -docker: - image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:543e209e7c1c1ffe720eb4db1a3f045a75099304fb19aa11a47dc717b8aae2a9 -# created: 2025-10-09T14:48:42.914384887Z diff --git a/.github/.OwlBot.yaml b/.github/.OwlBot.yaml deleted file mode 100644 index 8b142686c..000000000 --- a/.github/.OwlBot.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -docker: - image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - -deep-remove-regex: - - /owl-bot-staging - -begin-after-commit-hash: f2de93abafa306b2ebadf1d10d947db8bcf2bf15 - diff --git a/.github/auto-approve.yml b/.github/auto-approve.yml deleted file mode 100644 index 311ebbb85..000000000 --- a/.github/auto-approve.yml +++ /dev/null @@ -1,3 +0,0 @@ -# https://github.com/googleapis/repo-automation-bots/tree/main/packages/auto-approve -processes: - - "OwlBotTemplateChanges" diff --git a/.github/release-please.yml b/.github/release-please.yml deleted file mode 100644 index 5161ab347..000000000 --- a/.github/release-please.yml +++ /dev/null @@ -1,14 +0,0 @@ -releaseType: python -handleGHRelease: true -# NOTE: this section is generated by synthtool.languages.python -# See https://github.com/googleapis/synthtool/blob/master/synthtool/languages/python.py -branches: -- branch: v2 - handleGHRelease: true - releaseType: python -- branch: v1 - handleGHRelease: true - releaseType: python -- branch: v0 - handleGHRelease: true - releaseType: python diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml deleted file mode 100644 index b975c190d..000000000 --- a/.github/release-trigger.yml +++ /dev/null @@ -1,2 +0,0 @@ -enabled: true -multiScmName: python-bigquery diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml deleted file mode 100644 index ac91806eb..000000000 --- a/.github/sync-repo-settings.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings -mergeCommitAllowed: false -# Rules for main branch protection -branchProtectionRules: -# Identifies the protection rule pattern. Name of the branch to be protected. -# Defaults to `main` -- pattern: main - requiresLinearHistory: true - requiresCodeOwnerReviews: true - requiresStrictStatusChecks: true - requiredStatusCheckContexts: - - 'Kokoro' - - 'Kokoro system-3.13' - - 'Kokoro snippets-3.13' - - 'cla/google' - - 'Samples - Lint' - - 'Samples - Python 3.9' - - 'Samples - Python 3.10' - - 'Samples - Python 3.11' - - 'Samples - Python 3.12' - - 'Samples - Python 3.13' - - 'Samples - Python 3.14' -- pattern: v2 - requiresLinearHistory: true - requiresCodeOwnerReviews: true - requiresStrictStatusChecks: true - requiredStatusCheckContexts: - - 'Kokoro' - - 'cla/google' - - 'Samples - Lint' - - 'Samples - Python 3.9' - - 'Samples - Python 3.10' diff --git a/.librarian/state.yaml b/.librarian/state.yaml new file mode 100644 index 000000000..1834779bc --- /dev/null +++ b/.librarian/state.yaml @@ -0,0 +1,10 @@ +image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:39628f6e89c9cad27973b9a39a50f7052bec0435ee58c7027b4fa6b655943e31 +libraries: + - id: google-cloud-bigquery + version: 3.38.0 + apis: [] + source_roots: + - . + preserve_regex: [] + remove_regex: [] + tag_format: v{version} diff --git a/owlbot.py b/owlbot.py deleted file mode 100644 index bd694180f..000000000 --- a/owlbot.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This script is used to synthesize generated parts of this library.""" -from pathlib import Path - -import synthtool as s -from synthtool import gcp -from synthtool.languages import python - -REPO_ROOT = Path(__file__).parent.absolute() - -default_version = "v2" - -for library in s.get_staging_dirs(default_version): - # Avoid breaking change due to change in field renames. - # https://github.com/googleapis/python-bigquery/issues/319 - s.replace( - library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", - r"type_ ", - "type ", - ) - # Patch docs issue - s.replace( - library / f"google/cloud/bigquery_{library.name}/types/model.py", - r"""\"predicted_\"""", - """`predicted_`""", - ) - s.move(library / f"google/cloud/bigquery_{library.name}/types") -s.remove_staging_dirs() - -common = gcp.CommonTemplates() - -# ---------------------------------------------------------------------------- -# Add templated files -# ---------------------------------------------------------------------------- -templated_files = common.py_library( - cov_level=100, - samples=True, - microgenerator=True, - split_system_tests=True, - intersphinx_dependencies={ - "dateutil": "https://dateutil.readthedocs.io/en/latest/", - "geopandas": "https://geopandas.org/", - "pandas": "https://pandas.pydata.org/pandas-docs/stable/", - }, - system_test_python_versions=["3.9", "3.13"], - unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"], - default_python_version="3.9", -) - -# BigQuery has a custom multiprocessing note -s.move( - templated_files, - excludes=[ - "noxfile.py", - "renovate.json", - "docs/multiprocessing.rst", - "docs/index.rst", - ".coveragerc", - ".github/CODEOWNERS", - # Include custom SNIPPETS_TESTS job for performance. - # https://github.com/googleapis/python-bigquery/issues/191 - ".kokoro/presubmit/presubmit.cfg", - ".kokoro/presubmit/system-3.8.cfg", - ".kokoro/continuous/prerelease-deps.cfg", - ".kokoro/samples/python3.7/**", - ".kokoro/samples/python3.8/**", - ".github/workflows/**", # exclude gh actions as credentials are needed for tests - "README.rst", - ], -) - -python.configure_previous_major_version_branches() - -s.replace( - ".kokoro/test-samples-impl.sh", - """# `virtualenv==20.26.6` is added for Python 3.7 compatibility -python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6""", - "python3.9 -m pip install --upgrade --quiet nox virtualenv", -) - -s.replace( - "CONTRIBUTING.rst", - r"\$ nox -s py-3.8", - r"$ nox -s py-3.9", -) - -s.replace( - "scripts/readme-gen/templates/install_deps.tmpl.rst", - r"Samples are compatible with Python 3.7", - r"Samples are compatible with Python 3.9", -) - - -# ---------------------------------------------------------------------------- -# Samples templates -# ---------------------------------------------------------------------------- - -python.py_samples() - -s.replace( - "samples/**/noxfile.py", - 'BLACK_VERSION = "black==22.3.0"', - 'BLACK_VERSION = "black==23.7.0"', -) -s.replace( - "samples/**/noxfile.py", - r'ALL_VERSIONS = \["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"\]', - 'ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]', -) - -s.shell.run(["nox", "-s", "blacken"], hide_output=False) -for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): - s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) From 8016baa2b1797324c10208c30be991bd00f92a0b Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 21 Nov 2025 11:19:22 -0500 Subject: [PATCH 530/536] tests: temporarily pin pytest (#2334) Temporarily pin `pytest < 9` to resolve the following issue ``` for invalid_view_value in invalid_view_values: > with self.subTest(invalid_view_value=invalid_view_value): tests/unit/test_client.py:810: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /opt/hostedtoolcache/Python/3.11.14/x64/lib/python3.11/contextlib.py:144: in __exit__ next(self.gen) /opt/hostedtoolcache/Python/3.11.14/x64/lib/python3.11/contextlib.py:144: in __exit__ next(self.gen) .nox/unit-3-11/lib/python3.11/site-packages/_pytest/unittest.py:438: in addSubTest self.ihook.pytest_runtest_logreport(report=sub_report) .nox/unit-3-11/lib/python3.11/site-packages/pluggy/_hooks.py:512: in __call__ return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/pluggy/_manager.py:120: in _hookexec return self._inner_hookexec(hook_name, methods, kwargs, firstresult) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/xdist/remote.py:289: in pytest_runtest_logreport self.sendevent("testreport", data=data) .nox/unit-3-11/lib/python3.11/site-packages/xdist/remote.py:126: in sendevent self.channel.send((name, kwargs)) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:912: in send self.gateway._send(Message.CHANNEL_DATA, self.id, dumps_internal(item)) ^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1629: in dumps_internal return _Serializer().save(obj) # type: ignore[return-value] ^^^^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1647: in save self._save(obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1744: in save_tuple self._save(item) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = obj = def _save(self, obj: object) -> None: tp = type(obj) try: dispatch = self._dispatch[tp] except KeyError: methodname = "save_" + tp.__name__ meth: Callable[[_Serializer, object], None] | None = getattr( self.__class__, methodname, None ) if meth is None: > raise DumpError(f"can't serialize {tp}") from None E execnet.gateway_base.DumpError: can't serialize ``` The upstream issue is tracked in https://github.com/pytest-dev/pytest-xdist/issues/1273 --- noxfile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 2457382fb..194e7ce8f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -95,7 +95,8 @@ def default(session, install_extras=True): # Install all test dependencies, then install local packages in-place. session.install( - "pytest", + # TODO(https://github.com/pytest-dev/pytest-xdist/issues/1273): Remove once this bug is fixed + "pytest<9", "google-cloud-testutils", "pytest-cov", "pytest-xdist", From 0529726400b1df4ade75cc3c9a829632ccb72eb8 Mon Sep 17 00:00:00 2001 From: ohmayr Date: Fri, 21 Nov 2025 08:42:31 -0800 Subject: [PATCH 531/536] chore: update librarian sha (#2329) This PR updates the librarian sha to support v1.0.0 --- .librarian/state.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.librarian/state.yaml b/.librarian/state.yaml index 1834779bc..b5d3126e4 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,4 +1,4 @@ -image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:39628f6e89c9cad27973b9a39a50f7052bec0435ee58c7027b4fa6b655943e31 +image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620 libraries: - id: google-cloud-bigquery version: 3.38.0 From 91fed546bf0cf7f3feaa44453cc7dc2394fdbd92 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 26 Nov 2025 10:56:58 -0500 Subject: [PATCH 532/536] chore(python): removes a filter put in place due to a dependency issue with pyarrow (#2338) Due to an issue with `pyarrow`, a significant dependency for certain python-bigquery use cases, not being compatible with Python 3.14, we temporarily skipped the failing CI/CD check for 3.14 while awaiting the update to pyarrow. Pyarrow is now fully compatible, so that filter is being removed. **KNOWN ISSUES**: this will show that unittests for 3.14 are failing. This has nothing to do with this PR/these changes. It is being addressed in an alternate mod. It is due to a missing dependency related to handling IO for `geopandas` (namely it is missing `libgdal-dev`, etc which are normally installed with `pyogrio` + `geopandas`). Because `pyogrio` is currently not compatible with Python 3.14 the tests in 3.14 cannot complete. This should not prevent **this PR from being merged** to help solve the current issue, which is a blocker for getting our continuous tests to green. --- .github/workflows/unittest.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index f6b92547e..550724076 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -21,12 +21,6 @@ jobs: python -m pip install --upgrade setuptools pip wheel python -m pip install nox - name: Run unit tests - - # TODO (https://b.corp.google.com/issues/450370502) 3.14 is not yet supported by pyarrow. See - # https://github.com/googleapis/google-cloud-python/issues/14686 - # https://github.com/apache/arrow/issues/47438 - # Reinstate running tests with 3.14 once this bug is fixed - if: matrix.python != '3.14' env: COVERAGE_FILE: .coverage-${{ matrix.python }} run: | From fcaf397def1a8c7a05bae9f30f268dc696c4bba3 Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 15 Dec 2025 13:10:04 -0800 Subject: [PATCH 533/536] chore: update ownership/routing for repo (#2346) This PR effectively moves ownership for this repo to the python language team, and removes api-bigquery as the defacto code owner. --- .github/CODEOWNERS | 6 +++--- .github/blunderbuss.yml | 6 +++--- .repo-metadata.json | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6763f258c..c7478150e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -4,8 +4,8 @@ # For syntax help see: # https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax -# The @googleapis/api-bigquery is the default owner for changes in this repo -* @googleapis/api-bigquery @googleapis/yoshi-python +# The @googleapis/python-core-client-libraries is the default owner for changes in this repo +* @googleapis/python-core-client-libraries @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners @googleapis/yoshi-python +/samples/ @googleapis/python-core-client-libraries @googleapis/python-samples-owners @googleapis/yoshi-python diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml index 5b7383dc7..ff168399d 100644 --- a/.github/blunderbuss.yml +++ b/.github/blunderbuss.yml @@ -4,14 +4,14 @@ # Note: This file is autogenerated. To make changes to the assignee # team, please update `codeowner_team` in `.repo-metadata.json`. assign_issues: - - googleapis/api-bigquery + - googleapis/python-core-client-libraries assign_issues_by: - labels: - "samples" to: - googleapis/python-samples-reviewers - - googleapis/api-bigquery + - googleapis/python-core-client-libraries assign_prs: - - googleapis/api-bigquery + - googleapis/python-core-client-libraries diff --git a/.repo-metadata.json b/.repo-metadata.json index d1be7ec4d..82a1684ca 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -12,7 +12,7 @@ "api_id": "bigquery.googleapis.com", "requires_billing": false, "default_version": "v2", - "codeowner_team": "@googleapis/api-bigquery", + "codeowner_team": "@googleapis/python-core-client-libraries", "api_shortname": "bigquery", "api_description": "is a fully managed, NoOps, low cost data analytics service.\nData can be streamed into BigQuery at millions of rows per second to enable real-time analysis.\nWith BigQuery you can easily deploy Petabyte-scale Databases." } From 8634630fca32ae38ce6b4ef27679bb7444be59c6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 15 Dec 2025 18:20:36 -0500 Subject: [PATCH 534/536] chore: librarian release pull request: 20251212T151524Z (#2344) PR created by the Librarian CLI to initialize a release. Merging this PR will auto trigger a release. Librarian Version: v0.7.0 Language Image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620
google-cloud-bigquery: 3.39.0 ## [3.39.0](https://github.com/googleapis/python-bigquery/compare/v3.38.0...v3.39.0) (2025-12-12) ### Features * adds support for Python runtime 3.14 (#2322) ([6065e14c](https://github.com/googleapis/python-bigquery/commit/6065e14c)) * Add ExternalRuntimeOptions to BigQuery routine (#2311) ([fa76e310](https://github.com/googleapis/python-bigquery/commit/fa76e310)) ### Bug Fixes * remove ambiguous error codes from query retries (#2308) ([8bbd3d01](https://github.com/googleapis/python-bigquery/commit/8bbd3d01)) * include `io.Base` in the `PathType` (#2323) ([b11e09cb](https://github.com/googleapis/python-bigquery/commit/b11e09cb)) * honor custom `retry` in `job.result()` (#2302) ([e118b029](https://github.com/googleapis/python-bigquery/commit/e118b029)) ### Documentation * remove experimental annotations from GA features (#2303) ([1f1f9d41](https://github.com/googleapis/python-bigquery/commit/1f1f9d41))
Co-authored-by: Daniel Sanche --- .librarian/state.yaml | 3 ++- CHANGELOG.md | 20 ++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/.librarian/state.yaml b/.librarian/state.yaml index b5d3126e4..8d67105e3 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,7 +1,8 @@ image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620 libraries: - id: google-cloud-bigquery - version: 3.38.0 + version: 3.39.0 + last_generated_commit: "" apis: [] source_roots: - . diff --git a/CHANGELOG.md b/CHANGELOG.md index 95db5735c..4cf177cc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,26 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.39.0](https://github.com/googleapis/google-cloud-python/compare/google-cloud-bigquery-v3.38.0...google-cloud-bigquery-v3.39.0) (2025-12-12) + + +### Documentation + +* remove experimental annotations from GA features (#2303) ([1f1f9d41e8a2c9016198d848ad3f1cbb88cf77b0](https://github.com/googleapis/google-cloud-python/commit/1f1f9d41e8a2c9016198d848ad3f1cbb88cf77b0)) + + +### Features + +* adds support for Python runtime 3.14 (#2322) ([6065e14c448cb430189982dd70025fa0575777ca](https://github.com/googleapis/google-cloud-python/commit/6065e14c448cb430189982dd70025fa0575777ca)) +* Add ExternalRuntimeOptions to BigQuery routine (#2311) ([fa76e310a16ea6cba0071ff1d767ca1c71514da7](https://github.com/googleapis/google-cloud-python/commit/fa76e310a16ea6cba0071ff1d767ca1c71514da7)) + + +### Bug Fixes + +* include `io.Base` in the `PathType` (#2323) ([b11e09cb6ee32e451b37eda66bece2220b9ceaba](https://github.com/googleapis/google-cloud-python/commit/b11e09cb6ee32e451b37eda66bece2220b9ceaba)) +* honor custom `retry` in `job.result()` (#2302) ([e118b029bbc89a5adbab83f39858c356c23665bf](https://github.com/googleapis/google-cloud-python/commit/e118b029bbc89a5adbab83f39858c356c23665bf)) +* remove ambiguous error codes from query retries (#2308) ([8bbd3d01026c493dfa5903b397d2b01c0e9bf43b](https://github.com/googleapis/google-cloud-python/commit/8bbd3d01026c493dfa5903b397d2b01c0e9bf43b)) + ## [3.38.0](https://github.com/googleapis/python-bigquery/compare/v3.37.0...v3.38.0) (2025-09-15) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 22550a8f1..1d5e35889 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.38.0" +__version__ = "3.39.0" From bfd9d8da91d0cb1a1e5d87007fa8a5462486dca9 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 17 Dec 2025 01:02:06 +0000 Subject: [PATCH 535/536] chore(deps): update dependency urllib3 to v2.6.0 [security] (#2342) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Change | [Age](https://docs.renovatebot.com/merge-confidence/) | [Confidence](https://docs.renovatebot.com/merge-confidence/) | |---|---|---|---| | [urllib3](https://redirect.github.com/urllib3/urllib3) ([changelog](https://redirect.github.com/urllib3/urllib3/blob/main/CHANGES.rst)) | `==2.5.0` -> `==2.6.0` | ![age](https://developer.mend.io/api/mc/badges/age/pypi/urllib3/2.6.0?slim=true) | ![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/urllib3/2.5.0/2.6.0?slim=true) | ### GitHub Vulnerability Alerts #### [CVE-2025-66418](https://redirect.github.com/urllib3/urllib3/security/advisories/GHSA-gm62-xv2j-4w53) ## Impact urllib3 supports chained HTTP encoding algorithms for response content according to RFC 9110 (e.g., `Content-Encoding: gzip, zstd`). However, the number of links in the decompression chain was unbounded allowing a malicious server to insert a virtually unlimited number of compression steps leading to high CPU usage and massive memory allocation for the decompressed data. ## Affected usages Applications and libraries using urllib3 version 2.5.0 and earlier for HTTP requests to untrusted sources unless they disable content decoding explicitly. ## Remediation Upgrade to at least urllib3 v2.6.0 in which the library limits the number of links to 5. If upgrading is not immediately possible, use [`preload_content=False`](https://urllib3.readthedocs.io/en/2.5.0/advanced-usage.html#streaming-and-i-o) and ensure that `resp.headers["content-encoding"]` contains a safe number of encodings before reading the response content. #### [CVE-2025-66471](https://redirect.github.com/urllib3/urllib3/security/advisories/GHSA-2xpw-w6gg-jr37) ### Impact urllib3's [streaming API](https://urllib3.readthedocs.io/en/2.5.0/advanced-usage.html#streaming-and-i-o) is designed for the efficient handling of large HTTP responses by reading the content in chunks, rather than loading the entire response body into memory at once. When streaming a compressed response, urllib3 can perform decoding or decompression based on the HTTP `Content-Encoding` header (e.g., `gzip`, `deflate`, `br`, or `zstd`). The library must read compressed data from the network and decompress it until the requested chunk size is met. Any resulting decompressed data that exceeds the requested amount is held in an internal buffer for the next read operation. The decompression logic could cause urllib3 to fully decode a small amount of highly compressed data in a single operation. This can result in excessive resource consumption (high CPU usage and massive memory allocation for the decompressed data; CWE-409) on the client side, even if the application only requested a small chunk of data. ### Affected usages Applications and libraries using urllib3 version 2.5.0 and earlier to stream large compressed responses or content from untrusted sources. `stream()`, `read(amt=256)`, `read1(amt=256)`, `read_chunked(amt=256)`, `readinto(b)` are examples of `urllib3.HTTPResponse` method calls using the affected logic unless decoding is disabled explicitly. ### Remediation Upgrade to at least urllib3 v2.6.0 in which the library avoids decompressing data that exceeds the requested amount. If your environment contains a package facilitating the Brotli encoding, upgrade to at least Brotli 1.2.0 or brotlicffi 1.2.0.0 too. These versions are enforced by the `urllib3[brotli]` extra in the patched versions of urllib3. ### Credits The issue was reported by @​Cycloctane. Supplemental information was provided by @​stamparm during a security audit performed by [7ASecurity](https://7asecurity.com/) and facilitated by [OSTIF](https://ostif.org/). --- ### Release Notes
urllib3/urllib3 (urllib3) ### [`v2.6.0`](https://redirect.github.com/urllib3/urllib3/blob/HEAD/CHANGES.rst#260-2025-12-05) [Compare Source](https://redirect.github.com/urllib3/urllib3/compare/2.5.0...2.6.0) \================== ## Security - Fixed a security issue where streaming API could improperly handle highly compressed HTTP content ("decompression bombs") leading to excessive resource consumption even when a small amount of data was requested. Reading small chunks of compressed data is safer and much more efficient now. (`GHSA-2xpw-w6gg-jr37 `\_\_) - Fixed a security issue where an attacker could compose an HTTP response with virtually unlimited links in the `Content-Encoding` header, potentially leading to a denial of service (DoS) attack by exhausting system resources during decoding. The number of allowed chained encodings is now limited to 5. (`GHSA-gm62-xv2j-4w53 `\_\_) .. caution:: - If urllib3 is not installed with the optional `urllib3[brotli]` extra, but your environment contains a Brotli/brotlicffi/brotlipy package anyway, make sure to upgrade it to at least Brotli 1.2.0 or brotlicffi 1.2.0.0 to benefit from the security fixes and avoid warnings. Prefer using `urllib3[brotli]` to install a compatible Brotli package automatically. - If you use custom decompressors, please make sure to update them to respect the changed API of `urllib3.response.ContentDecoder`. ## Features - Enabled retrieval, deletion, and membership testing in `HTTPHeaderDict` using bytes keys. (`#​3653 `\_\_) - Added host and port information to string representations of `HTTPConnection`. (`#​3666 `\_\_) - Added support for Python 3.14 free-threading builds explicitly. (`#​3696 `\_\_) ## Removals - Removed the `HTTPResponse.getheaders()` method in favor of `HTTPResponse.headers`. Removed the `HTTPResponse.getheader(name, default)` method in favor of `HTTPResponse.headers.get(name, default)`. (`#​3622 `\_\_) ## Bugfixes - Fixed redirect handling in `urllib3.PoolManager` when an integer is passed for the retries parameter. (`#​3649 `\_\_) - Fixed `HTTPConnectionPool` when used in Emscripten with no explicit port. (`#​3664 `\_\_) - Fixed handling of `SSLKEYLOGFILE` with expandable variables. (`#​3700 `\_\_) ## Misc - Changed the `zstd` extra to install `backports.zstd` instead of `zstandard` on Python 3.13 and before. (`#​3693 `\_\_) - Improved the performance of content decoding by optimizing `BytesQueueBuffer` class. (`#​3710 `\_\_) - Allowed building the urllib3 package with newer setuptools-scm v9.x. (`#​3652 `\_\_) - Ensured successful urllib3 builds by setting Hatchling requirement to >= 1.27.0. (`#​3638 `\_\_)
--- ### Configuration 📅 **Schedule**: Branch creation - "" (UTC), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR was generated by [Mend Renovate](https://mend.io/renovate/). View the [repository job log](https://developer.mend.io/github/googleapis/python-bigquery). Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8955e0cfd..ec5c7f2af 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -41,4 +41,4 @@ Shapely==2.1.2; python_version >= '3.10' six==1.17.0 typing-extensions==4.15.0 typing-inspect==0.9.0 -urllib3==2.5.0 +urllib3==2.6.0 From 8d5785aea50b9f9e5b13bd4c91e8a08d6dac7778 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 19 Dec 2025 10:44:31 -0800 Subject: [PATCH 536/536] feat: support timestamp_precision in table schema (#2333) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- google/cloud/bigquery/enums.py | 15 ++++++++ google/cloud/bigquery/schema.py | 55 ++++++++++++++++++++++++----- tests/system/test_client.py | 23 +++++++++++++ tests/unit/test_schema.py | 61 ++++++++++++++++++++++++++++++++- 4 files changed, 144 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 1b1eb241a..dc67f9674 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -480,3 +480,18 @@ class SourceColumnMatch(str, enum.Enum): NAME = "NAME" """Matches by name. This reads the header row as column names and reorders columns to match the field names in the schema.""" + + +class TimestampPrecision(enum.Enum): + """Precision (maximum number of total digits in base 10) for seconds of + TIMESTAMP type.""" + + MICROSECOND = None + """ + Default, for TIMESTAMP type with microsecond precision. + """ + + PICOSECOND = 12 + """ + For TIMESTAMP type with picosecond precision. + """ diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 456730b00..1809df21f 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -196,6 +196,14 @@ class SchemaField(object): Only valid for top-level schema fields (not nested fields). If the type is FOREIGN, this field is required. + + timestamp_precision: Optional[enums.TimestampPrecision] + Precision (maximum number of total digits in base 10) for seconds + of TIMESTAMP type. + + Defaults to `enums.TimestampPrecision.MICROSECOND` (`None`) for + microsecond precision. Use `enums.TimestampPrecision.PICOSECOND` + (`12`) for picosecond precision. """ def __init__( @@ -213,6 +221,7 @@ def __init__( range_element_type: Union[FieldElementType, str, None] = None, rounding_mode: Union[enums.RoundingMode, str, None] = None, foreign_type_definition: Optional[str] = None, + timestamp_precision: Optional[enums.TimestampPrecision] = None, ): self._properties: Dict[str, Any] = { "name": name, @@ -237,6 +246,13 @@ def __init__( if isinstance(policy_tags, PolicyTagList) else None ) + if isinstance(timestamp_precision, enums.TimestampPrecision): + self._properties["timestampPrecision"] = timestamp_precision.value + elif timestamp_precision is not None: + raise ValueError( + "timestamp_precision must be class enums.TimestampPrecision " + f"or None, got {type(timestamp_precision)} instead." + ) if isinstance(range_element_type, str): self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): @@ -254,15 +270,22 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: - api_repr (Mapping[str, str]): The serialized representation - of the SchemaField, such as what is output by - :meth:`to_api_repr`. + api_repr (dict): The serialized representation of the SchemaField, + such as what is output by :meth:`to_api_repr`. Returns: google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ placeholder = cls("this_will_be_replaced", "PLACEHOLDER") + # The API would return a string despite we send an integer. To ensure + # success of resending received schema, we convert string to integer + # to ensure consistency. + try: + api_repr["timestampPrecision"] = int(api_repr["timestampPrecision"]) + except (TypeError, KeyError): + pass + # Note: we don't make a copy of api_repr because this can cause # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD # fields. See https://github.com/googleapis/python-bigquery/issues/6 @@ -374,6 +397,16 @@ def policy_tags(self): resource = self._properties.get("policyTags") return PolicyTagList.from_api_repr(resource) if resource is not None else None + @property + def timestamp_precision(self) -> enums.TimestampPrecision: + """Precision (maximum number of total digits in base 10) for seconds of + TIMESTAMP type. + + Returns: + enums.TimestampPrecision: value of TimestampPrecision. + """ + return enums.TimestampPrecision(self._properties.get("timestampPrecision")) + def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. @@ -408,6 +441,8 @@ def _key(self): None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) ) + timestamp_precision = self._properties.get("timestampPrecision") + return ( self.name, field_type, @@ -417,6 +452,7 @@ def _key(self): self.description, self.fields, policy_tags, + timestamp_precision, ) def to_standard_sql(self) -> standard_sql.StandardSqlField: @@ -467,10 +503,9 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - key = self._key() - policy_tags = key[-1] + *initial_tags, policy_tags, timestamp_precision_tag = self._key() policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags) - adjusted_key = key[:-1] + (policy_tags_inst,) + adjusted_key = (*initial_tags, policy_tags_inst, timestamp_precision_tag) return f"{self.__class__.__name__}{adjusted_key}" @@ -530,9 +565,11 @@ def _to_schema_fields(schema): if isinstance(schema, Sequence): # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields return [ - field - if isinstance(field, SchemaField) - else SchemaField.from_api_repr(field) + ( + field + if isinstance(field, SchemaField) + else SchemaField.from_api_repr(field) + ) for field in schema ] diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 6584ca03c..3d32a3634 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -74,6 +74,16 @@ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] +SCHEMA_PICOSECOND = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField( + "time_pico", + "TIMESTAMP", + mode="REQUIRED", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, + ), +] CLUSTERING_SCHEMA = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), @@ -631,6 +641,19 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): self.assertEqual(time_partitioning.field, "transaction_time") self.assertEqual(table.clustering_fields, ["user_email", "store_code"]) + def test_create_table_w_picosecond_timestamp(self): + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" + table_arg = Table(dataset.table(table_id), schema=SCHEMA_PICOSECOND) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertEqual(table.table_id, table_id) + self.assertEqual(table.schema, SCHEMA_PICOSECOND) + def test_delete_dataset_with_string(self): dataset_id = _make_dataset_id("delete_table_true_with_string") project = Config.CLIENT.project diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index c63a8312c..f61b22035 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -52,6 +52,9 @@ def test_constructor_defaults(self): self.assertIsNone(field.default_value_expression) self.assertEqual(field.rounding_mode, None) self.assertEqual(field.foreign_type_definition, None) + self.assertEqual( + field.timestamp_precision, enums.TimestampPrecision.MICROSECOND + ) def test_constructor_explicit(self): FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" @@ -69,6 +72,7 @@ def test_constructor_explicit(self): default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED, foreign_type_definition="INTEGER", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") @@ -87,6 +91,10 @@ def test_constructor_explicit(self): ) self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") self.assertEqual(field.foreign_type_definition, "INTEGER") + self.assertEqual( + field.timestamp_precision, + enums.TimestampPrecision.PICOSECOND, + ) def test_constructor_explicit_none(self): field = self._make_one("test", "STRING", description=None, policy_tags=None) @@ -189,6 +197,23 @@ def test_to_api_repr_with_subfield(self): }, ) + def test_to_api_repr_w_timestamp_precision(self): + field = self._make_one( + "foo", + "TIMESTAMP", + "NULLABLE", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, + ) + self.assertEqual( + field.to_api_repr(), + { + "mode": "NULLABLE", + "name": "foo", + "type": "TIMESTAMP", + "timestampPrecision": 12, + }, + ) + def test_from_api_repr(self): field = self._get_target_class().from_api_repr( { @@ -198,6 +223,7 @@ def test_from_api_repr(self): "name": "foo", "type": "record", "roundingMode": "ROUNDING_MODE_UNSPECIFIED", + "timestampPrecision": 12, } ) self.assertEqual(field.name, "foo") @@ -210,6 +236,10 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].mode, "NULLABLE") self.assertEqual(field.range_element_type, None) self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") + self.assertEqual( + field.timestamp_precision, + enums.TimestampPrecision.PICOSECOND, + ) def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -264,6 +294,17 @@ def test_from_api_repr_defaults(self): self.assertNotIn("policyTags", field._properties) self.assertNotIn("rangeElementType", field._properties) + def test_from_api_repr_timestamp_precision_str(self): + # The backend would return timestampPrecision field as a string, even + # if we send over an integer. This test verifies we manually converted + # it into integer to ensure resending could succeed. + field = self._get_target_class().from_api_repr( + { + "timestampPrecision": "12", + } + ) + self.assertEqual(field._properties["timestampPrecision"], 12) + def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") @@ -323,6 +364,22 @@ def test_foreign_type_definition_property_str(self): schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + def test_timestamp_precision_unsupported_type(self): + with pytest.raises(ValueError) as e: + self._make_one("test", "TIMESTAMP", timestamp_precision=12) + + assert "timestamp_precision must be class enums.TimestampPrecision" in str( + e.value + ) + + def test_timestamp_precision_property(self): + TIMESTAMP_PRECISION = enums.TimestampPrecision.PICOSECOND + schema_field = self._make_one("test", "TIMESTAMP") + schema_field._properties[ + "timestampPrecision" + ] = enums.TimestampPrecision.PICOSECOND.value + self.assertEqual(schema_field.timestamp_precision, TIMESTAMP_PRECISION) + def test_to_standard_sql_simple_type(self): examples = ( # a few legacy types @@ -637,7 +694,9 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)" + expected = ( + "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None, None)" + ) self.assertEqual(repr(field1), expected) def test___repr__evaluable_no_policy_tags(self):